osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <machine/db_machdep.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_shared_region.h>
 107 #include <vm/vm_map_store.h>
 108
 109 /* Internal prototypes
 110  */
 111
 112 static void vm_map_simplify_range(
 113         vm_map_t        map,
 114         vm_map_offset_t start,
 115         vm_map_offset_t end);   /* forward */
 116
 117 static boolean_t        vm_map_range_check(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end,
 121         vm_map_entry_t  *entry);
 122
 123 static vm_map_entry_t   _vm_map_entry_create(
 124         struct vm_map_header    *map_header, boolean_t map_locked);
 125
 126 static void             _vm_map_entry_dispose(
 127         struct vm_map_header    *map_header,
 128         vm_map_entry_t          entry);
 129
 130 static void             vm_map_pmap_enter(
 131         vm_map_t                map,
 132         vm_map_offset_t         addr,
 133         vm_map_offset_t         end_addr,
 134         vm_object_t             object,
 135         vm_object_offset_t      offset,
 136         vm_prot_t               protection);
 137
 138 static void             _vm_map_clip_end(
 139         struct vm_map_header    *map_header,
 140         vm_map_entry_t          entry,
 141         vm_map_offset_t         end);
 142
 143 static void             _vm_map_clip_start(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         start);
 147
 148 static void             vm_map_entry_delete(
 149         vm_map_t        map,
 150         vm_map_entry_t  entry);
 151
 152 static kern_return_t    vm_map_delete(
 153         vm_map_t        map,
 154         vm_map_offset_t start,
 155         vm_map_offset_t end,
 156         int             flags,
 157         vm_map_t        zap_map);
 158
 159 static kern_return_t    vm_map_copy_overwrite_unaligned(
 160         vm_map_t        dst_map,
 161         vm_map_entry_t  entry,
 162         vm_map_copy_t   copy,
 163         vm_map_address_t start);
 164
 165 static kern_return_t    vm_map_copy_overwrite_aligned(
 166         vm_map_t        dst_map,
 167         vm_map_entry_t  tmp_entry,
 168         vm_map_copy_t   copy,
 169         vm_map_offset_t start,
 170         pmap_t          pmap);
 171
 172 static kern_return_t    vm_map_copyin_kernel_buffer(
 173         vm_map_t        src_map,
 174         vm_map_address_t src_addr,
 175         vm_map_size_t   len,
 176         boolean_t       src_destroy,
 177         vm_map_copy_t   *copy_result);  /* OUT */
 178
 179 static kern_return_t    vm_map_copyout_kernel_buffer(
 180         vm_map_t        map,
 181         vm_map_address_t *addr, /* IN/OUT */
 182         vm_map_copy_t   copy,
 183         boolean_t       overwrite);
 184
 185 static void             vm_map_fork_share(
 186         vm_map_t        old_map,
 187         vm_map_entry_t  old_entry,
 188         vm_map_t        new_map);
 189
 190 static boolean_t        vm_map_fork_copy(
 191         vm_map_t        old_map,
 192         vm_map_entry_t  *old_entry_p,
 193         vm_map_t        new_map);
 194
 195 void            vm_map_region_top_walk(
 196         vm_map_entry_t             entry,
 197         vm_region_top_info_t       top);
 198
 199 void            vm_map_region_walk(
 200         vm_map_t                   map,
 201         vm_map_offset_t            va,
 202         vm_map_entry_t             entry,
 203         vm_object_offset_t         offset,
 204         vm_object_size_t           range,
 205         vm_region_extended_info_t  extended,
 206         boolean_t                  look_for_pages);
 207
 208 static kern_return_t    vm_map_wire_nested(
 209         vm_map_t                   map,
 210         vm_map_offset_t            start,
 211         vm_map_offset_t            end,
 212         vm_prot_t                  access_type,
 213         boolean_t                  user_wire,
 214         pmap_t                     map_pmap,
 215         vm_map_offset_t            pmap_addr);
 216
 217 static kern_return_t    vm_map_unwire_nested(
 218         vm_map_t                   map,
 219         vm_map_offset_t            start,
 220         vm_map_offset_t            end,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr);
 224
 225 static kern_return_t    vm_map_overwrite_submap_recurse(
 226         vm_map_t                   dst_map,
 227         vm_map_offset_t            dst_addr,
 228         vm_map_size_t              dst_size);
 229
 230 static kern_return_t    vm_map_copy_overwrite_nested(
 231         vm_map_t                   dst_map,
 232         vm_map_offset_t            dst_addr,
 233         vm_map_copy_t              copy,
 234         boolean_t                  interruptible,
 235         pmap_t                     pmap,
 236         boolean_t                  discard_on_success);
 237
 238 static kern_return_t    vm_map_remap_extract(
 239         vm_map_t                map,
 240         vm_map_offset_t         addr,
 241         vm_map_size_t           size,
 242         boolean_t               copy,
 243         struct vm_map_header    *map_header,
 244         vm_prot_t               *cur_protection,
 245         vm_prot_t               *max_protection,
 246         vm_inherit_t            inheritance,
 247         boolean_t               pageable);
 248
 249 static kern_return_t    vm_map_remap_range_allocate(
 250         vm_map_t                map,
 251         vm_map_address_t        *address,
 252         vm_map_size_t           size,
 253         vm_map_offset_t         mask,
 254         int                     flags,
 255         vm_map_entry_t          *map_entry);
 256
 257 static void             vm_map_region_look_for_page(
 258         vm_map_t                   map,
 259         vm_map_offset_t            va,
 260         vm_object_t                object,
 261         vm_object_offset_t         offset,
 262         int                        max_refcnt,
 263         int                        depth,
 264         vm_region_extended_info_t  extended);
 265
 266 static int              vm_map_region_count_obj_refs(
 267         vm_map_entry_t             entry,
 268         vm_object_t                object);
 269
 270
 271 static kern_return_t    vm_map_willneed(
 272         vm_map_t        map,
 273         vm_map_offset_t start,
 274         vm_map_offset_t end);
 275
 276 static kern_return_t    vm_map_reuse_pages(
 277         vm_map_t        map,
 278         vm_map_offset_t start,
 279         vm_map_offset_t end);
 280
 281 static kern_return_t    vm_map_reusable_pages(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_can_reuse(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 #if CONFIG_FREEZE
 292 struct default_freezer_table;
 293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
 294 __private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);
 295 #endif
 296
 297 /*
 298  * Macros to copy a vm_map_entry. We must be careful to correctly
 299  * manage the wired page count. vm_map_entry_copy() creates a new
 300  * map entry to the same memory - the wired count in the new entry
 301  * must be set to zero. vm_map_entry_copy_full() creates a new
 302  * entry that is identical to the old entry.  This preserves the
 303  * wire count; it's used for map splitting and zone changing in
 304  * vm_map_copyout.
 305  */
 306 #define vm_map_entry_copy(NEW,OLD)      \
 307 MACRO_BEGIN                             \
 308 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 309         *(NEW) = *(OLD);                \
 310         (NEW)->is_shared = FALSE;       \
 311         (NEW)->needs_wakeup = FALSE;    \
 312         (NEW)->in_transition = FALSE;   \
 313         (NEW)->wired_count = 0;         \
 314         (NEW)->user_wired_count = 0;    \
 315         (NEW)->permanent = FALSE;       \
 316         (NEW)->from_reserved_zone = _vmec_reserved;                     \
 317 MACRO_END
 318
 319 #define vm_map_entry_copy_full(NEW,OLD)                 \
 320 MACRO_BEGIN                                             \
 321 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 322 (*(NEW) = *(OLD));                                      \
 323 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 324 MACRO_END
 325
 326 /*
 327  *      Decide if we want to allow processes to execute from their data or stack areas.
 328  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 329  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 330  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 331  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 332  *      specific pmap files since the default behavior varies according to architecture.  The
 333  *      main reason it varies is because of the need to provide binary compatibility with old
 334  *      applications that were written before these restrictions came into being.  In the old
 335  *      days, an app could execute anything it could read, but this has slowly been tightened
 336  *      up over time.  The default behavior is:
 337  *
 338  *      32-bit PPC apps         may execute from both stack and data areas
 339  *      32-bit Intel apps       may exeucte from data areas but not stack
 340  *      64-bit PPC/Intel apps   may not execute from either data or stack
 341  *
 342  *      An application on any architecture may override these defaults by explicitly
 343  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 344  *      system call.  This code here just determines what happens when an app tries to
 345  *      execute from a page that lacks execute permission.
 346  *
 347  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 348  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 349  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 350  *      execution from data areas for a particular binary even if the arch normally permits it. As
 351  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 352  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 353  *      are not all NX-safe.
 354  */
 355
 356 extern int allow_data_exec, allow_stack_exec;
 357
 358 int
 359 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 360 {
 361         int current_abi;
 362
 363         /*
 364          * Determine if the app is running in 32 or 64 bit mode.
 365          */
 366
 367         if (vm_map_is_64bit(map))
 368                 current_abi = VM_ABI_64;
 369         else
 370                 current_abi = VM_ABI_32;
 371
 372         /*
 373          * Determine if we should allow the execution based on whether it's a
 374          * stack or data area and the current architecture.
 375          */
 376
 377         if (user_tag == VM_MEMORY_STACK)
 378                 return allow_stack_exec & current_abi;
 379
 380         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 381 }
 382
 383
 384 /*
 385  *      Virtual memory maps provide for the mapping, protection,
 386  *      and sharing of virtual memory objects.  In addition,
 387  *      this module provides for an efficient virtual copy of
 388  *      memory from one map to another.
 389  *
 390  *      Synchronization is required prior to most operations.
 391  *
 392  *      Maps consist of an ordered doubly-linked list of simple
 393  *      entries; a single hint is used to speed up lookups.
 394  *
 395  *      Sharing maps have been deleted from this version of Mach.
 396  *      All shared objects are now mapped directly into the respective
 397  *      maps.  This requires a change in the copy on write strategy;
 398  *      the asymmetric (delayed) strategy is used for shared temporary
 399  *      objects instead of the symmetric (shadow) strategy.  All maps
 400  *      are now "top level" maps (either task map, kernel map or submap
 401  *      of the kernel map).
 402  *
 403  *      Since portions of maps are specified by start/end addreses,
 404  *      which may not align with existing map entries, all
 405  *      routines merely "clip" entries to these start/end values.
 406  *      [That is, an entry is split into two, bordering at a
 407  *      start or end value.]  Note that these clippings may not
 408  *      always be necessary (as the two resulting entries are then
 409  *      not changed); however, the clipping is done for convenience.
 410  *      No attempt is currently made to "glue back together" two
 411  *      abutting entries.
 412  *
 413  *      The symmetric (shadow) copy strategy implements virtual copy
 414  *      by copying VM object references from one map to
 415  *      another, and then marking both regions as copy-on-write.
 416  *      It is important to note that only one writeable reference
 417  *      to a VM object region exists in any map when this strategy
 418  *      is used -- this means that shadow object creation can be
 419  *      delayed until a write operation occurs.  The symmetric (delayed)
 420  *      strategy allows multiple maps to have writeable references to
 421  *      the same region of a vm object, and hence cannot delay creating
 422  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 423  *      Copying of permanent objects is completely different; see
 424  *      vm_object_copy_strategically() in vm_object.c.
 425  */
 426
 427 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 428 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 429 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 430                                          * allocations */
 431 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 432
 433
 434 /*
 435  *      Placeholder object for submap operations.  This object is dropped
 436  *      into the range by a call to vm_map_find, and removed when
 437  *      vm_map_submap creates the submap.
 438  */
 439
 440 vm_object_t     vm_submap_object;
 441
 442 static void             *map_data;
 443 static vm_size_t        map_data_size;
 444 static void             *kentry_data;
 445 static vm_size_t        kentry_data_size;
 446
 447 #if CONFIG_EMBEDDED
 448 #define         NO_COALESCE_LIMIT  0
 449 #else
 450 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 451 #endif
 452
 453 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 454 unsigned int not_in_kdp = 1;
 455
 456 unsigned int vm_map_set_cache_attr_count = 0;
 457
 458 kern_return_t
 459 vm_map_set_cache_attr(
 460         vm_map_t        map,
 461         vm_map_offset_t va)
 462 {
 463         vm_map_entry_t  map_entry;
 464         vm_object_t     object;
 465         kern_return_t   kr = KERN_SUCCESS;
 466
 467         vm_map_lock_read(map);
 468
 469         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 470             map_entry->is_sub_map) {
 471                 /*
 472                  * that memory is not properly mapped
 473                  */
 474                 kr = KERN_INVALID_ARGUMENT;
 475                 goto done;
 476         }
 477         object = map_entry->object.vm_object;
 478
 479         if (object == VM_OBJECT_NULL) {
 480                 /*
 481                  * there should be a VM object here at this point
 482                  */
 483                 kr = KERN_INVALID_ARGUMENT;
 484                 goto done;
 485         }
 486         vm_object_lock(object);
 487         object->set_cache_attr = TRUE;
 488         vm_object_unlock(object);
 489
 490         vm_map_set_cache_attr_count++;
 491 done:
 492         vm_map_unlock_read(map);
 493
 494         return kr;
 495 }
 496
 497
 498 #if CONFIG_CODE_DECRYPTION
 499 /*
 500  * vm_map_apple_protected:
 501  * This remaps the requested part of the object with an object backed by
 502  * the decrypting pager.
 503  * crypt_info contains entry points and session data for the crypt module.
 504  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 505  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 506  */
 507 kern_return_t
 508 vm_map_apple_protected(
 509         vm_map_t        map,
 510         vm_map_offset_t start,
 511         vm_map_offset_t end,
 512         struct pager_crypt_info *crypt_info)
 513 {
 514         boolean_t       map_locked;
 515         kern_return_t   kr;
 516         vm_map_entry_t  map_entry;
 517         memory_object_t protected_mem_obj;
 518         vm_object_t     protected_object;
 519         vm_map_offset_t map_addr;
 520
 521         vm_map_lock_read(map);
 522         map_locked = TRUE;
 523
 524         /* lookup the protected VM object */
 525         if (!vm_map_lookup_entry(map,
 526                                  start,
 527                                  &map_entry) ||
 528             map_entry->vme_end < end ||
 529             map_entry->is_sub_map) {
 530                 /* that memory is not properly mapped */
 531                 kr = KERN_INVALID_ARGUMENT;
 532                 goto done;
 533         }
 534         protected_object = map_entry->object.vm_object;
 535         if (protected_object == VM_OBJECT_NULL) {
 536                 /* there should be a VM object here at this point */
 537                 kr = KERN_INVALID_ARGUMENT;
 538                 goto done;
 539         }
 540
 541         /* make sure protected object stays alive while map is unlocked */
 542         vm_object_reference(protected_object);
 543
 544         vm_map_unlock_read(map);
 545         map_locked = FALSE;
 546
 547         /*
 548          * Lookup (and create if necessary) the protected memory object
 549          * matching that VM object.
 550          * If successful, this also grabs a reference on the memory object,
 551          * to guarantee that it doesn't go away before we get a chance to map
 552          * it.
 553          */
 554         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 555
 556         /* release extra ref on protected object */
 557         vm_object_deallocate(protected_object);
 558
 559         if (protected_mem_obj == NULL) {
 560                 kr = KERN_FAILURE;
 561                 goto done;
 562         }
 563
 564         /* map this memory object in place of the current one */
 565         map_addr = start;
 566         kr = vm_map_enter_mem_object(map,
 567                                      &map_addr,
 568                                      end - start,
 569                                      (mach_vm_offset_t) 0,
 570                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 571                                      (ipc_port_t) protected_mem_obj,
 572                                      (map_entry->offset +
 573                                       (start - map_entry->vme_start)),
 574                                      TRUE,
 575                                      map_entry->protection,
 576                                      map_entry->max_protection,
 577                                      map_entry->inheritance);
 578         assert(map_addr == start);
 579         /*
 580          * Release the reference obtained by apple_protect_pager_setup().
 581          * The mapping (if it succeeded) is now holding a reference on the
 582          * memory object.
 583          */
 584         memory_object_deallocate(protected_mem_obj);
 585
 586 done:
 587         if (map_locked) {
 588                 vm_map_unlock_read(map);
 589         }
 590         return kr;
 591 }
 592 #endif  /* CONFIG_CODE_DECRYPTION */
 593
 594
 595 lck_grp_t               vm_map_lck_grp;
 596 lck_grp_attr_t  vm_map_lck_grp_attr;
 597 lck_attr_t              vm_map_lck_attr;
 598
 599
 600 /*
 601  *      vm_map_init:
 602  *
 603  *      Initialize the vm_map module.  Must be called before
 604  *      any other vm_map routines.
 605  *
 606  *      Map and entry structures are allocated from zones -- we must
 607  *      initialize those zones.
 608  *
 609  *      There are three zones of interest:
 610  *
 611  *      vm_map_zone:            used to allocate maps.
 612  *      vm_map_entry_zone:      used to allocate map entries.
 613  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 614  *
 615  *      The kernel allocates map entries from a special zone that is initially
 616  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 617  *      the kernel to allocate more memory to a entry zone when it became
 618  *      empty since the very act of allocating memory implies the creation
 619  *      of a new entry.
 620  */
 621 void
 622 vm_map_init(
 623         void)
 624 {
 625         vm_size_t entry_zone_alloc_size;
 626         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 627                             PAGE_SIZE, "maps");
 628         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 629 #if     defined(__LP64__)
 630         entry_zone_alloc_size = PAGE_SIZE * 5;
 631 #else
 632         entry_zone_alloc_size = PAGE_SIZE * 6;
 633 #endif
 634
 635         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 636                                   1024*1024, entry_zone_alloc_size,
 637                                   "VM map entries");
 638         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 639         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 640
 641         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 642                                    kentry_data_size * 64, kentry_data_size,
 643                                    "Reserved VM map entries");
 644         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 645
 646         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 647                                  16*1024, PAGE_SIZE, "VM map copies");
 648         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 649
 650         /*
 651          *      Cram the map and kentry zones with initial data.
 652          *      Set reserved_zone non-collectible to aid zone_gc().
 653          */
 654         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 655
 656         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 657         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 658         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 659         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 660         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 661         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 662
 663         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 664         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 665
 666         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 667         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 668         lck_attr_setdefault(&vm_map_lck_attr);
 669 }
 670
 671 void
 672 vm_map_steal_memory(
 673         void)
 674 {
 675         uint32_t kentry_initial_pages;
 676
 677         map_data_size = round_page(10 * sizeof(struct _vm_map));
 678         map_data = pmap_steal_memory(map_data_size);
 679
 680         /*
 681          * kentry_initial_pages corresponds to the number of kernel map entries
 682          * required during bootstrap until the asynchronous replenishment
 683          * scheme is activated and/or entries are available from the general
 684          * map entry pool.
 685          */
 686 #if     defined(__LP64__)
 687         kentry_initial_pages = 10;
 688 #else
 689         kentry_initial_pages = 6;
 690 #endif
 691         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 692         kentry_data = pmap_steal_memory(kentry_data_size);
 693 }
 694
 695 void vm_kernel_reserved_entry_init(void) {
 696         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 697 }
 698
 699 /*
 700  *      vm_map_create:
 701  *
 702  *      Creates and returns a new empty VM map with
 703  *      the given physical map structure, and having
 704  *      the given lower and upper address bounds.
 705  */
 706 vm_map_t
 707 vm_map_create(
 708         pmap_t                  pmap,
 709         vm_map_offset_t min,
 710         vm_map_offset_t max,
 711         boolean_t               pageable)
 712 {
 713         static int              color_seed = 0;
 714         register vm_map_t       result;
 715
 716         result = (vm_map_t) zalloc(vm_map_zone);
 717         if (result == VM_MAP_NULL)
 718                 panic("vm_map_create");
 719
 720         vm_map_first_entry(result) = vm_map_to_entry(result);
 721         vm_map_last_entry(result)  = vm_map_to_entry(result);
 722         result->hdr.nentries = 0;
 723         result->hdr.entries_pageable = pageable;
 724
 725         vm_map_store_init( &(result->hdr) );
 726
 727         result->size = 0;
 728         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 729         result->user_wire_size  = 0;
 730         result->ref_count = 1;
 731 #if     TASK_SWAPPER
 732         result->res_count = 1;
 733         result->sw_state = MAP_SW_IN;
 734 #endif  /* TASK_SWAPPER */
 735         result->pmap = pmap;
 736         result->min_offset = min;
 737         result->max_offset = max;
 738         result->wiring_required = FALSE;
 739         result->no_zero_fill = FALSE;
 740         result->mapped = FALSE;
 741         result->wait_for_space = FALSE;
 742         result->switch_protect = FALSE;
 743         result->disable_vmentry_reuse = FALSE;
 744         result->map_disallow_data_exec = FALSE;
 745         result->highest_entry_end = 0;
 746         result->first_free = vm_map_to_entry(result);
 747         result->hint = vm_map_to_entry(result);
 748         result->color_rr = (color_seed++) & vm_color_mask;
 749         result->jit_entry_exists = FALSE;
 750 #if CONFIG_FREEZE
 751         result->default_freezer_toc = NULL;
 752 #endif
 753         vm_map_lock_init(result);
 754         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 755
 756         return(result);
 757 }
 758
 759 /*
 760  *      vm_map_entry_create:    [ internal use only ]
 761  *
 762  *      Allocates a VM map entry for insertion in the
 763  *      given map (or map copy).  No fields are filled.
 764  */
 765 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 766
 767 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 768         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 769 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 770
 771 static vm_map_entry_t
 772 _vm_map_entry_create(
 773         struct vm_map_header    *map_header, boolean_t __unused map_locked)
 774 {
 775         zone_t  zone;
 776         vm_map_entry_t  entry;
 777
 778         zone = vm_map_entry_zone;
 779
 780         assert(map_header->entries_pageable ? !map_locked : TRUE);
 781
 782         if (map_header->entries_pageable) {
 783                 entry = (vm_map_entry_t) zalloc(zone);
 784         }
 785         else {
 786                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
 787
 788                 if (entry == VM_MAP_ENTRY_NULL) {
 789                         zone = vm_map_entry_reserved_zone;
 790                         entry = (vm_map_entry_t) zalloc(zone);
 791                         OSAddAtomic(1, &reserved_zalloc_count);
 792                 } else
 793                         OSAddAtomic(1, &nonreserved_zalloc_count);
 794         }
 795
 796         if (entry == VM_MAP_ENTRY_NULL)
 797                 panic("vm_map_entry_create");
 798         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
 799
 800         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 801
 802         return(entry);
 803 }
 804
 805 /*
 806  *      vm_map_entry_dispose:   [ internal use only ]
 807  *
 808  *      Inverse of vm_map_entry_create.
 809  *
 810  *      write map lock held so no need to
 811  *      do anything special to insure correctness
 812  *      of the stores
 813  */
 814 #define vm_map_entry_dispose(map, entry)                        \
 815         vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);  \
 816         _vm_map_entry_dispose(&(map)->hdr, (entry))
 817
 818 #define vm_map_copy_entry_dispose(map, entry) \
 819         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 820
 821 static void
 822 _vm_map_entry_dispose(
 823         register struct vm_map_header   *map_header,
 824         register vm_map_entry_t         entry)
 825 {
 826         register zone_t         zone;
 827
 828         if (map_header->entries_pageable || !(entry->from_reserved_zone))
 829                 zone = vm_map_entry_zone;
 830         else
 831                 zone = vm_map_entry_reserved_zone;
 832
 833         if (!map_header->entries_pageable) {
 834                 if (zone == vm_map_entry_zone)
 835                         OSAddAtomic(-1, &nonreserved_zalloc_count);
 836                 else
 837                         OSAddAtomic(-1, &reserved_zalloc_count);
 838         }
 839
 840         zfree(zone, entry);
 841 }
 842
 843 #if MACH_ASSERT
 844 static boolean_t first_free_check = FALSE;
 845 boolean_t
 846 first_free_is_valid(
 847         vm_map_t        map)
 848 {
 849         if (!first_free_check)
 850                 return TRUE;
 851
 852         return( first_free_is_valid_store( map ));
 853 }
 854 #endif /* MACH_ASSERT */
 855
 856
 857 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 858         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 859
 860 #define vm_map_copy_entry_unlink(copy, entry)                           \
 861         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 862
 863 #if     MACH_ASSERT && TASK_SWAPPER
 864 /*
 865  *      vm_map_res_reference:
 866  *
 867  *      Adds another valid residence count to the given map.
 868  *
 869  *      Map is locked so this function can be called from
 870  *      vm_map_swapin.
 871  *
 872  */
 873 void vm_map_res_reference(register vm_map_t map)
 874 {
 875         /* assert map is locked */
 876         assert(map->res_count >= 0);
 877         assert(map->ref_count >= map->res_count);
 878         if (map->res_count == 0) {
 879                 lck_mtx_unlock(&map->s_lock);
 880                 vm_map_lock(map);
 881                 vm_map_swapin(map);
 882                 lck_mtx_lock(&map->s_lock);
 883                 ++map->res_count;
 884                 vm_map_unlock(map);
 885         } else
 886                 ++map->res_count;
 887 }
 888
 889 /*
 890  *      vm_map_reference_swap:
 891  *
 892  *      Adds valid reference and residence counts to the given map.
 893  *
 894  *      The map may not be in memory (i.e. zero residence count).
 895  *
 896  */
 897 void vm_map_reference_swap(register vm_map_t map)
 898 {
 899         assert(map != VM_MAP_NULL);
 900         lck_mtx_lock(&map->s_lock);
 901         assert(map->res_count >= 0);
 902         assert(map->ref_count >= map->res_count);
 903         map->ref_count++;
 904         vm_map_res_reference(map);
 905         lck_mtx_unlock(&map->s_lock);
 906 }
 907
 908 /*
 909  *      vm_map_res_deallocate:
 910  *
 911  *      Decrement residence count on a map; possibly causing swapout.
 912  *
 913  *      The map must be in memory (i.e. non-zero residence count).
 914  *
 915  *      The map is locked, so this function is callable from vm_map_deallocate.
 916  *
 917  */
 918 void vm_map_res_deallocate(register vm_map_t map)
 919 {
 920         assert(map->res_count > 0);
 921         if (--map->res_count == 0) {
 922                 lck_mtx_unlock(&map->s_lock);
 923                 vm_map_lock(map);
 924                 vm_map_swapout(map);
 925                 vm_map_unlock(map);
 926                 lck_mtx_lock(&map->s_lock);
 927         }
 928         assert(map->ref_count >= map->res_count);
 929 }
 930 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 931
 932 /*
 933  *      vm_map_destroy:
 934  *
 935  *      Actually destroy a map.
 936  */
 937 void
 938 vm_map_destroy(
 939         vm_map_t        map,
 940         int             flags)
 941 {
 942         vm_map_lock(map);
 943
 944         /* clean up regular map entries */
 945         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 946                              flags, VM_MAP_NULL);
 947         /* clean up leftover special mappings (commpage, etc...) */
 948         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 949                              flags, VM_MAP_NULL);
 950
 951 #if CONFIG_FREEZE
 952         if (map->default_freezer_toc){
 953                 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
 954         }
 955 #endif
 956         vm_map_unlock(map);
 957
 958         assert(map->hdr.nentries == 0);
 959
 960         if(map->pmap)
 961                 pmap_destroy(map->pmap);
 962
 963         zfree(vm_map_zone, map);
 964 }
 965
 966 #if     TASK_SWAPPER
 967 /*
 968  * vm_map_swapin/vm_map_swapout
 969  *
 970  * Swap a map in and out, either referencing or releasing its resources.
 971  * These functions are internal use only; however, they must be exported
 972  * because they may be called from macros, which are exported.
 973  *
 974  * In the case of swapout, there could be races on the residence count,
 975  * so if the residence count is up, we return, assuming that a
 976  * vm_map_deallocate() call in the near future will bring us back.
 977  *
 978  * Locking:
 979  *      -- We use the map write lock for synchronization among races.
 980  *      -- The map write lock, and not the simple s_lock, protects the
 981  *         swap state of the map.
 982  *      -- If a map entry is a share map, then we hold both locks, in
 983  *         hierarchical order.
 984  *
 985  * Synchronization Notes:
 986  *      1) If a vm_map_swapin() call happens while swapout in progress, it
 987  *      will block on the map lock and proceed when swapout is through.
 988  *      2) A vm_map_reference() call at this time is illegal, and will
 989  *      cause a panic.  vm_map_reference() is only allowed on resident
 990  *      maps, since it refuses to block.
 991  *      3) A vm_map_swapin() call during a swapin will block, and
 992  *      proceeed when the first swapin is done, turning into a nop.
 993  *      This is the reason the res_count is not incremented until
 994  *      after the swapin is complete.
 995  *      4) There is a timing hole after the checks of the res_count, before
 996  *      the map lock is taken, during which a swapin may get the lock
 997  *      before a swapout about to happen.  If this happens, the swapin
 998  *      will detect the state and increment the reference count, causing
 999  *      the swapout to be a nop, thereby delaying it until a later
1000  *      vm_map_deallocate.  If the swapout gets the lock first, then
1001  *      the swapin will simply block until the swapout is done, and
1002  *      then proceed.
1003  *
1004  * Because vm_map_swapin() is potentially an expensive operation, it
1005  * should be used with caution.
1006  *
1007  * Invariants:
1008  *      1) A map with a residence count of zero is either swapped, or
1009  *         being swapped.
1010  *      2) A map with a non-zero residence count is either resident,
1011  *         or being swapped in.
1012  */
1013
1014 int vm_map_swap_enable = 1;
1015
1016 void vm_map_swapin (vm_map_t map)
1017 {
1018         register vm_map_entry_t entry;
1019
1020         if (!vm_map_swap_enable)        /* debug */
1021                 return;
1022
1023         /*
1024          * Map is locked
1025          * First deal with various races.
1026          */
1027         if (map->sw_state == MAP_SW_IN)
1028                 /*
1029                  * we raced with swapout and won.  Returning will incr.
1030                  * the res_count, turning the swapout into a nop.
1031                  */
1032                 return;
1033
1034         /*
1035          * The residence count must be zero.  If we raced with another
1036          * swapin, the state would have been IN; if we raced with a
1037          * swapout (after another competing swapin), we must have lost
1038          * the race to get here (see above comment), in which case
1039          * res_count is still 0.
1040          */
1041         assert(map->res_count == 0);
1042
1043         /*
1044          * There are no intermediate states of a map going out or
1045          * coming in, since the map is locked during the transition.
1046          */
1047         assert(map->sw_state == MAP_SW_OUT);
1048
1049         /*
1050          * We now operate upon each map entry.  If the entry is a sub-
1051          * or share-map, we call vm_map_res_reference upon it.
1052          * If the entry is an object, we call vm_object_res_reference
1053          * (this may iterate through the shadow chain).
1054          * Note that we hold the map locked the entire time,
1055          * even if we get back here via a recursive call in
1056          * vm_map_res_reference.
1057          */
1058         entry = vm_map_first_entry(map);
1059
1060         while (entry != vm_map_to_entry(map)) {
1061                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1062                         if (entry->is_sub_map) {
1063                                 vm_map_t lmap = entry->object.sub_map;
1064                                 lck_mtx_lock(&lmap->s_lock);
1065                                 vm_map_res_reference(lmap);
1066                                 lck_mtx_unlock(&lmap->s_lock);
1067                         } else {
1068                                 vm_object_t object = entry->object.vm_object;
1069                                 vm_object_lock(object);
1070                                 /*
1071                                  * This call may iterate through the
1072                                  * shadow chain.
1073                                  */
1074                                 vm_object_res_reference(object);
1075                                 vm_object_unlock(object);
1076                         }
1077                 }
1078                 entry = entry->vme_next;
1079         }
1080         assert(map->sw_state == MAP_SW_OUT);
1081         map->sw_state = MAP_SW_IN;
1082 }
1083
1084 void vm_map_swapout(vm_map_t map)
1085 {
1086         register vm_map_entry_t entry;
1087
1088         /*
1089          * Map is locked
1090          * First deal with various races.
1091          * If we raced with a swapin and lost, the residence count
1092          * will have been incremented to 1, and we simply return.
1093          */
1094         lck_mtx_lock(&map->s_lock);
1095         if (map->res_count != 0) {
1096                 lck_mtx_unlock(&map->s_lock);
1097                 return;
1098         }
1099         lck_mtx_unlock(&map->s_lock);
1100
1101         /*
1102          * There are no intermediate states of a map going out or
1103          * coming in, since the map is locked during the transition.
1104          */
1105         assert(map->sw_state == MAP_SW_IN);
1106
1107         if (!vm_map_swap_enable)
1108                 return;
1109
1110         /*
1111          * We now operate upon each map entry.  If the entry is a sub-
1112          * or share-map, we call vm_map_res_deallocate upon it.
1113          * If the entry is an object, we call vm_object_res_deallocate
1114          * (this may iterate through the shadow chain).
1115          * Note that we hold the map locked the entire time,
1116          * even if we get back here via a recursive call in
1117          * vm_map_res_deallocate.
1118          */
1119         entry = vm_map_first_entry(map);
1120
1121         while (entry != vm_map_to_entry(map)) {
1122                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1123                         if (entry->is_sub_map) {
1124                                 vm_map_t lmap = entry->object.sub_map;
1125                                 lck_mtx_lock(&lmap->s_lock);
1126                                 vm_map_res_deallocate(lmap);
1127                                 lck_mtx_unlock(&lmap->s_lock);
1128                         } else {
1129                                 vm_object_t object = entry->object.vm_object;
1130                                 vm_object_lock(object);
1131                                 /*
1132                                  * This call may take a long time,
1133                                  * since it could actively push
1134                                  * out pages (if we implement it
1135                                  * that way).
1136                                  */
1137                                 vm_object_res_deallocate(object);
1138                                 vm_object_unlock(object);
1139                         }
1140                 }
1141                 entry = entry->vme_next;
1142         }
1143         assert(map->sw_state == MAP_SW_IN);
1144         map->sw_state = MAP_SW_OUT;
1145 }
1146
1147 #endif  /* TASK_SWAPPER */
1148
1149 /*
1150  *      vm_map_lookup_entry:    [ internal use only ]
1151  *
1152  *      Calls into the vm map store layer to find the map
1153  *      entry containing (or immediately preceding) the
1154  *      specified address in the given map; the entry is returned
1155  *      in the "entry" parameter.  The boolean
1156  *      result indicates whether the address is
1157  *      actually contained in the map.
1158  */
1159 boolean_t
1160 vm_map_lookup_entry(
1161         register vm_map_t               map,
1162         register vm_map_offset_t        address,
1163         vm_map_entry_t          *entry)         /* OUT */
1164 {
1165         return ( vm_map_store_lookup_entry( map, address, entry ));
1166 }
1167
1168 /*
1169  *      Routine:        vm_map_find_space
1170  *      Purpose:
1171  *              Allocate a range in the specified virtual address map,
1172  *              returning the entry allocated for that range.
1173  *              Used by kmem_alloc, etc.
1174  *
1175  *              The map must be NOT be locked. It will be returned locked
1176  *              on KERN_SUCCESS, unlocked on failure.
1177  *
1178  *              If an entry is allocated, the object/offset fields
1179  *              are initialized to zero.
1180  */
1181 kern_return_t
1182 vm_map_find_space(
1183         register vm_map_t       map,
1184         vm_map_offset_t         *address,       /* OUT */
1185         vm_map_size_t           size,
1186         vm_map_offset_t         mask,
1187         int                     flags,
1188         vm_map_entry_t          *o_entry)       /* OUT */
1189 {
1190         register vm_map_entry_t entry, new_entry;
1191         register vm_map_offset_t        start;
1192         register vm_map_offset_t        end;
1193
1194         if (size == 0) {
1195                 *address = 0;
1196                 return KERN_INVALID_ARGUMENT;
1197         }
1198
1199         if (flags & VM_FLAGS_GUARD_AFTER) {
1200                 /* account for the back guard page in the size */
1201                 size += PAGE_SIZE_64;
1202         }
1203
1204         new_entry = vm_map_entry_create(map, FALSE);
1205
1206         /*
1207          *      Look for the first possible address; if there's already
1208          *      something at this address, we have to start after it.
1209          */
1210
1211         vm_map_lock(map);
1212
1213         if( map->disable_vmentry_reuse == TRUE) {
1214                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1215         } else {
1216                 assert(first_free_is_valid(map));
1217                 if ((entry = map->first_free) == vm_map_to_entry(map))
1218                         start = map->min_offset;
1219                 else
1220                         start = entry->vme_end;
1221         }
1222
1223         /*
1224          *      In any case, the "entry" always precedes
1225          *      the proposed new region throughout the loop:
1226          */
1227
1228         while (TRUE) {
1229                 register vm_map_entry_t next;
1230
1231                 /*
1232                  *      Find the end of the proposed new region.
1233                  *      Be sure we didn't go beyond the end, or
1234                  *      wrap around the address.
1235                  */
1236
1237                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1238                         /* reserve space for the front guard page */
1239                         start += PAGE_SIZE_64;
1240                 }
1241                 end = ((start + mask) & ~mask);
1242
1243                 if (end < start) {
1244                         vm_map_entry_dispose(map, new_entry);
1245                         vm_map_unlock(map);
1246                         return(KERN_NO_SPACE);
1247                 }
1248                 start = end;
1249                 end += size;
1250
1251                 if ((end > map->max_offset) || (end < start)) {
1252                         vm_map_entry_dispose(map, new_entry);
1253                         vm_map_unlock(map);
1254                         return(KERN_NO_SPACE);
1255                 }
1256
1257                 /*
1258                  *      If there are no more entries, we must win.
1259                  */
1260
1261                 next = entry->vme_next;
1262                 if (next == vm_map_to_entry(map))
1263                         break;
1264
1265                 /*
1266                  *      If there is another entry, it must be
1267                  *      after the end of the potential new region.
1268                  */
1269
1270                 if (next->vme_start >= end)
1271                         break;
1272
1273                 /*
1274                  *      Didn't fit -- move to the next entry.
1275                  */
1276
1277                 entry = next;
1278                 start = entry->vme_end;
1279         }
1280
1281         /*
1282          *      At this point,
1283          *              "start" and "end" should define the endpoints of the
1284          *                      available new range, and
1285          *              "entry" should refer to the region before the new
1286          *                      range, and
1287          *
1288          *              the map should be locked.
1289          */
1290
1291         if (flags & VM_FLAGS_GUARD_BEFORE) {
1292                 /* go back for the front guard page */
1293                 start -= PAGE_SIZE_64;
1294         }
1295         *address = start;
1296
1297         assert(start < end);
1298         new_entry->vme_start = start;
1299         new_entry->vme_end = end;
1300         assert(page_aligned(new_entry->vme_start));
1301         assert(page_aligned(new_entry->vme_end));
1302
1303         new_entry->is_shared = FALSE;
1304         new_entry->is_sub_map = FALSE;
1305         new_entry->use_pmap = FALSE;
1306         new_entry->object.vm_object = VM_OBJECT_NULL;
1307         new_entry->offset = (vm_object_offset_t) 0;
1308
1309         new_entry->needs_copy = FALSE;
1310
1311         new_entry->inheritance = VM_INHERIT_DEFAULT;
1312         new_entry->protection = VM_PROT_DEFAULT;
1313         new_entry->max_protection = VM_PROT_ALL;
1314         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1315         new_entry->wired_count = 0;
1316         new_entry->user_wired_count = 0;
1317
1318         new_entry->in_transition = FALSE;
1319         new_entry->needs_wakeup = FALSE;
1320         new_entry->no_cache = FALSE;
1321         new_entry->permanent = FALSE;
1322         new_entry->superpage_size = 0;
1323
1324         new_entry->alias = 0;
1325         new_entry->zero_wired_pages = FALSE;
1326
1327         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1328
1329         /*
1330          *      Insert the new entry into the list
1331          */
1332
1333         vm_map_store_entry_link(map, entry, new_entry);
1334
1335         map->size += size;
1336
1337         /*
1338          *      Update the lookup hint
1339          */
1340         SAVE_HINT_MAP_WRITE(map, new_entry);
1341
1342         *o_entry = new_entry;
1343         return(KERN_SUCCESS);
1344 }
1345
1346 int vm_map_pmap_enter_print = FALSE;
1347 int vm_map_pmap_enter_enable = FALSE;
1348
1349 /*
1350  *      Routine:        vm_map_pmap_enter [internal only]
1351  *
1352  *      Description:
1353  *              Force pages from the specified object to be entered into
1354  *              the pmap at the specified address if they are present.
1355  *              As soon as a page not found in the object the scan ends.
1356  *
1357  *      Returns:
1358  *              Nothing.
1359  *
1360  *      In/out conditions:
1361  *              The source map should not be locked on entry.
1362  */
1363 static void
1364 vm_map_pmap_enter(
1365         vm_map_t                map,
1366         register vm_map_offset_t        addr,
1367         register vm_map_offset_t        end_addr,
1368         register vm_object_t    object,
1369         vm_object_offset_t      offset,
1370         vm_prot_t               protection)
1371 {
1372         int                     type_of_fault;
1373         kern_return_t           kr;
1374
1375         if(map->pmap == 0)
1376                 return;
1377
1378         while (addr < end_addr) {
1379                 register vm_page_t      m;
1380
1381                 vm_object_lock(object);
1382
1383                 m = vm_page_lookup(object, offset);
1384                 /*
1385                  * ENCRYPTED SWAP:
1386                  * The user should never see encrypted data, so do not
1387                  * enter an encrypted page in the page table.
1388                  */
1389                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1390                     m->fictitious ||
1391                     (m->unusual && ( m->error || m->restart || m->absent))) {
1392                         vm_object_unlock(object);
1393                         return;
1394                 }
1395
1396                 if (vm_map_pmap_enter_print) {
1397                         printf("vm_map_pmap_enter:");
1398                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1399                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1400                 }
1401                 type_of_fault = DBG_CACHE_HIT_FAULT;
1402                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1403                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1404                                     &type_of_fault);
1405
1406                 vm_object_unlock(object);
1407
1408                 offset += PAGE_SIZE_64;
1409                 addr += PAGE_SIZE;
1410         }
1411 }
1412
1413 boolean_t vm_map_pmap_is_empty(
1414         vm_map_t        map,
1415         vm_map_offset_t start,
1416         vm_map_offset_t end);
1417 boolean_t vm_map_pmap_is_empty(
1418         vm_map_t        map,
1419         vm_map_offset_t start,
1420         vm_map_offset_t end)
1421 {
1422 #ifdef MACHINE_PMAP_IS_EMPTY
1423         return pmap_is_empty(map->pmap, start, end);
1424 #else   /* MACHINE_PMAP_IS_EMPTY */
1425         vm_map_offset_t offset;
1426         ppnum_t         phys_page;
1427
1428         if (map->pmap == NULL) {
1429                 return TRUE;
1430         }
1431
1432         for (offset = start;
1433              offset < end;
1434              offset += PAGE_SIZE) {
1435                 phys_page = pmap_find_phys(map->pmap, offset);
1436                 if (phys_page) {
1437                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1438                                 "page %d at 0x%llx\n",
1439                                 map, (long long)start, (long long)end,
1440                                 phys_page, (long long)offset);
1441                         return FALSE;
1442                 }
1443         }
1444         return TRUE;
1445 #endif  /* MACHINE_PMAP_IS_EMPTY */
1446 }
1447
1448 /*
1449  *      Routine:        vm_map_enter
1450  *
1451  *      Description:
1452  *              Allocate a range in the specified virtual address map.
1453  *              The resulting range will refer to memory defined by
1454  *              the given memory object and offset into that object.
1455  *
1456  *              Arguments are as defined in the vm_map call.
1457  */
1458 int _map_enter_debug = 0;
1459 static unsigned int vm_map_enter_restore_successes = 0;
1460 static unsigned int vm_map_enter_restore_failures = 0;
1461 kern_return_t
1462 vm_map_enter(
1463         vm_map_t                map,
1464         vm_map_offset_t         *address,       /* IN/OUT */
1465         vm_map_size_t           size,
1466         vm_map_offset_t         mask,
1467         int                     flags,
1468         vm_object_t             object,
1469         vm_object_offset_t      offset,
1470         boolean_t               needs_copy,
1471         vm_prot_t               cur_protection,
1472         vm_prot_t               max_protection,
1473         vm_inherit_t            inheritance)
1474 {
1475         vm_map_entry_t          entry, new_entry;
1476         vm_map_offset_t         start, tmp_start, tmp_offset;
1477         vm_map_offset_t         end, tmp_end;
1478         vm_map_offset_t         tmp2_start, tmp2_end;
1479         vm_map_offset_t         step;
1480         kern_return_t           result = KERN_SUCCESS;
1481         vm_map_t                zap_old_map = VM_MAP_NULL;
1482         vm_map_t                zap_new_map = VM_MAP_NULL;
1483         boolean_t               map_locked = FALSE;
1484         boolean_t               pmap_empty = TRUE;
1485         boolean_t               new_mapping_established = FALSE;
1486         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1487         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1488         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1489         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1490         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1491         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1492         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1493         char                    alias;
1494         vm_map_offset_t         effective_min_offset, effective_max_offset;
1495         kern_return_t           kr;
1496
1497         if (superpage_size) {
1498                 switch (superpage_size) {
1499                         /*
1500                          * Note that the current implementation only supports
1501                          * a single size for superpages, SUPERPAGE_SIZE, per
1502                          * architecture. As soon as more sizes are supposed
1503                          * to be supported, SUPERPAGE_SIZE has to be replaced
1504                          * with a lookup of the size depending on superpage_size.
1505                          */
1506 #ifdef __x86_64__
1507                         case SUPERPAGE_SIZE_ANY:
1508                                 /* handle it like 2 MB and round up to page size */
1509                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1510                         case SUPERPAGE_SIZE_2MB:
1511                                 break;
1512 #endif
1513                         default:
1514                                 return KERN_INVALID_ARGUMENT;
1515                 }
1516                 mask = SUPERPAGE_SIZE-1;
1517                 if (size & (SUPERPAGE_SIZE-1))
1518                         return KERN_INVALID_ARGUMENT;
1519                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1520         }
1521
1522
1523 #if CONFIG_EMBEDDED
1524         if (cur_protection & VM_PROT_WRITE){
1525                 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1526                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1527                         cur_protection &= ~VM_PROT_EXECUTE;
1528                 }
1529         }
1530 #endif /* CONFIG_EMBEDDED */
1531
1532         if (is_submap) {
1533                 if (purgable) {
1534                         /* submaps can not be purgeable */
1535                         return KERN_INVALID_ARGUMENT;
1536                 }
1537                 if (object == VM_OBJECT_NULL) {
1538                         /* submaps can not be created lazily */
1539                         return KERN_INVALID_ARGUMENT;
1540                 }
1541         }
1542         if (flags & VM_FLAGS_ALREADY) {
1543                 /*
1544                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1545                  * is already present.  For it to be meaningul, the requested
1546                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1547                  * we shouldn't try and remove what was mapped there first
1548                  * (!VM_FLAGS_OVERWRITE).
1549                  */
1550                 if ((flags & VM_FLAGS_ANYWHERE) ||
1551                     (flags & VM_FLAGS_OVERWRITE)) {
1552                         return KERN_INVALID_ARGUMENT;
1553                 }
1554         }
1555
1556         effective_min_offset = map->min_offset;
1557
1558         if (flags & VM_FLAGS_BEYOND_MAX) {
1559                 /*
1560                  * Allow an insertion beyond the map's max offset.
1561                  */
1562                 if (vm_map_is_64bit(map))
1563                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1564                 else
1565                         effective_max_offset = 0x00000000FFFFF000ULL;
1566         } else {
1567                 effective_max_offset = map->max_offset;
1568         }
1569
1570         if (size == 0 ||
1571             (offset & PAGE_MASK_64) != 0) {
1572                 *address = 0;
1573                 return KERN_INVALID_ARGUMENT;
1574         }
1575
1576         VM_GET_FLAGS_ALIAS(flags, alias);
1577
1578 #define RETURN(value)   { result = value; goto BailOut; }
1579
1580         assert(page_aligned(*address));
1581         assert(page_aligned(size));
1582
1583         /*
1584          * Only zero-fill objects are allowed to be purgable.
1585          * LP64todo - limit purgable objects to 32-bits for now
1586          */
1587         if (purgable &&
1588             (offset != 0 ||
1589              (object != VM_OBJECT_NULL &&
1590               (object->vo_size != size ||
1591                object->purgable == VM_PURGABLE_DENY))
1592              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1593                 return KERN_INVALID_ARGUMENT;
1594
1595         if (!anywhere && overwrite) {
1596                 /*
1597                  * Create a temporary VM map to hold the old mappings in the
1598                  * affected area while we create the new one.
1599                  * This avoids releasing the VM map lock in
1600                  * vm_map_entry_delete() and allows atomicity
1601                  * when we want to replace some mappings with a new one.
1602                  * It also allows us to restore the old VM mappings if the
1603                  * new mapping fails.
1604                  */
1605                 zap_old_map = vm_map_create(PMAP_NULL,
1606                                             *address,
1607                                             *address + size,
1608                                             map->hdr.entries_pageable);
1609         }
1610
1611 StartAgain: ;
1612
1613         start = *address;
1614
1615         if (anywhere) {
1616                 vm_map_lock(map);
1617                 map_locked = TRUE;
1618
1619                 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1620                         result = KERN_INVALID_ARGUMENT;
1621                         goto BailOut;
1622                 }
1623
1624                 /*
1625                  *      Calculate the first possible address.
1626                  */
1627
1628                 if (start < effective_min_offset)
1629                         start = effective_min_offset;
1630                 if (start > effective_max_offset)
1631                         RETURN(KERN_NO_SPACE);
1632
1633                 /*
1634                  *      Look for the first possible address;
1635                  *      if there's already something at this
1636                  *      address, we have to start after it.
1637                  */
1638
1639                 if( map->disable_vmentry_reuse == TRUE) {
1640                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1641                 } else {
1642                         assert(first_free_is_valid(map));
1643
1644                         entry = map->first_free;
1645
1646                         if (entry == vm_map_to_entry(map)) {
1647                                 entry = NULL;
1648                         } else {
1649                                if (entry->vme_next == vm_map_to_entry(map)){
1650                                        /*
1651                                         * Hole at the end of the map.
1652                                         */
1653                                         entry = NULL;
1654                                } else {
1655                                         if (start < (entry->vme_next)->vme_start ) {
1656                                                 start = entry->vme_end;
1657                                         } else {
1658                                                 /*
1659                                                  * Need to do a lookup.
1660                                                  */
1661                                                 entry = NULL;
1662                                         }
1663                                }
1664                         }
1665
1666                         if (entry == NULL) {
1667                                 vm_map_entry_t  tmp_entry;
1668                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
1669                                         start = tmp_entry->vme_end;
1670                                 entry = tmp_entry;
1671                         }
1672                 }
1673
1674                 /*
1675                  *      In any case, the "entry" always precedes
1676                  *      the proposed new region throughout the
1677                  *      loop:
1678                  */
1679
1680                 while (TRUE) {
1681                         register vm_map_entry_t next;
1682
1683                         /*
1684                          *      Find the end of the proposed new region.
1685                          *      Be sure we didn't go beyond the end, or
1686                          *      wrap around the address.
1687                          */
1688
1689                         end = ((start + mask) & ~mask);
1690                         if (end < start)
1691                                 RETURN(KERN_NO_SPACE);
1692                         start = end;
1693                         end += size;
1694
1695                         if ((end > effective_max_offset) || (end < start)) {
1696                                 if (map->wait_for_space) {
1697                                         if (size <= (effective_max_offset -
1698                                                      effective_min_offset)) {
1699                                                 assert_wait((event_t)map,
1700                                                             THREAD_ABORTSAFE);
1701                                                 vm_map_unlock(map);
1702                                                 map_locked = FALSE;
1703                                                 thread_block(THREAD_CONTINUE_NULL);
1704                                                 goto StartAgain;
1705                                         }
1706                                 }
1707                                 RETURN(KERN_NO_SPACE);
1708                         }
1709
1710                         /*
1711                          *      If there are no more entries, we must win.
1712                          */
1713
1714                         next = entry->vme_next;
1715                         if (next == vm_map_to_entry(map))
1716                                 break;
1717
1718                         /*
1719                          *      If there is another entry, it must be
1720                          *      after the end of the potential new region.
1721                          */
1722
1723                         if (next->vme_start >= end)
1724                                 break;
1725
1726                         /*
1727                          *      Didn't fit -- move to the next entry.
1728                          */
1729
1730                         entry = next;
1731                         start = entry->vme_end;
1732                 }
1733                 *address = start;
1734         } else {
1735                 /*
1736                  *      Verify that:
1737                  *              the address doesn't itself violate
1738                  *              the mask requirement.
1739                  */
1740
1741                 vm_map_lock(map);
1742                 map_locked = TRUE;
1743                 if ((start & mask) != 0)
1744                         RETURN(KERN_NO_SPACE);
1745
1746                 /*
1747                  *      ...     the address is within bounds
1748                  */
1749
1750                 end = start + size;
1751
1752                 if ((start < effective_min_offset) ||
1753                     (end > effective_max_offset) ||
1754                     (start >= end)) {
1755                         RETURN(KERN_INVALID_ADDRESS);
1756                 }
1757
1758                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1759                         /*
1760                          * Fixed mapping and "overwrite" flag: attempt to
1761                          * remove all existing mappings in the specified
1762                          * address range, saving them in our "zap_old_map".
1763                          */
1764                         (void) vm_map_delete(map, start, end,
1765                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1766                                              zap_old_map);
1767                 }
1768
1769                 /*
1770                  *      ...     the starting address isn't allocated
1771                  */
1772
1773                 if (vm_map_lookup_entry(map, start, &entry)) {
1774                         if (! (flags & VM_FLAGS_ALREADY)) {
1775                                 RETURN(KERN_NO_SPACE);
1776                         }
1777                         /*
1778                          * Check if what's already there is what we want.
1779                          */
1780                         tmp_start = start;
1781                         tmp_offset = offset;
1782                         if (entry->vme_start < start) {
1783                                 tmp_start -= start - entry->vme_start;
1784                                 tmp_offset -= start - entry->vme_start;
1785
1786                         }
1787                         for (; entry->vme_start < end;
1788                              entry = entry->vme_next) {
1789                                 /*
1790                                  * Check if the mapping's attributes
1791                                  * match the existing map entry.
1792                                  */
1793                                 if (entry == vm_map_to_entry(map) ||
1794                                     entry->vme_start != tmp_start ||
1795                                     entry->is_sub_map != is_submap ||
1796                                     entry->offset != tmp_offset ||
1797                                     entry->needs_copy != needs_copy ||
1798                                     entry->protection != cur_protection ||
1799                                     entry->max_protection != max_protection ||
1800                                     entry->inheritance != inheritance ||
1801                                     entry->alias != alias) {
1802                                         /* not the same mapping ! */
1803                                         RETURN(KERN_NO_SPACE);
1804                                 }
1805                                 /*
1806                                  * Check if the same object is being mapped.
1807                                  */
1808                                 if (is_submap) {
1809                                         if (entry->object.sub_map !=
1810                                             (vm_map_t) object) {
1811                                                 /* not the same submap */
1812                                                 RETURN(KERN_NO_SPACE);
1813                                         }
1814                                 } else {
1815                                         if (entry->object.vm_object != object) {
1816                                                 /* not the same VM object... */
1817                                                 vm_object_t obj2;
1818
1819                                                 obj2 = entry->object.vm_object;
1820                                                 if ((obj2 == VM_OBJECT_NULL ||
1821                                                      obj2->internal) &&
1822                                                     (object == VM_OBJECT_NULL ||
1823                                                      object->internal)) {
1824                                                         /*
1825                                                          * ... but both are
1826                                                          * anonymous memory,
1827                                                          * so equivalent.
1828                                                          */
1829                                                 } else {
1830                                                         RETURN(KERN_NO_SPACE);
1831                                                 }
1832                                         }
1833                                 }
1834
1835                                 tmp_offset += entry->vme_end - entry->vme_start;
1836                                 tmp_start += entry->vme_end - entry->vme_start;
1837                                 if (entry->vme_end >= end) {
1838                                         /* reached the end of our mapping */
1839                                         break;
1840                                 }
1841                         }
1842                         /* it all matches:  let's use what's already there ! */
1843                         RETURN(KERN_MEMORY_PRESENT);
1844                 }
1845
1846                 /*
1847                  *      ...     the next region doesn't overlap the
1848                  *              end point.
1849                  */
1850
1851                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1852                     (entry->vme_next->vme_start < end))
1853                         RETURN(KERN_NO_SPACE);
1854         }
1855
1856         /*
1857          *      At this point,
1858          *              "start" and "end" should define the endpoints of the
1859          *                      available new range, and
1860          *              "entry" should refer to the region before the new
1861          *                      range, and
1862          *
1863          *              the map should be locked.
1864          */
1865
1866         /*
1867          *      See whether we can avoid creating a new entry (and object) by
1868          *      extending one of our neighbors.  [So far, we only attempt to
1869          *      extend from below.]  Note that we can never extend/join
1870          *      purgable objects because they need to remain distinct
1871          *      entities in order to implement their "volatile object"
1872          *      semantics.
1873          */
1874
1875         if (purgable) {
1876                 if (object == VM_OBJECT_NULL) {
1877                         object = vm_object_allocate(size);
1878                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1879                         object->purgable = VM_PURGABLE_NONVOLATILE;
1880                         offset = (vm_object_offset_t)0;
1881                 }
1882         } else if ((is_submap == FALSE) &&
1883                    (object == VM_OBJECT_NULL) &&
1884                    (entry != vm_map_to_entry(map)) &&
1885                    (entry->vme_end == start) &&
1886                    (!entry->is_shared) &&
1887                    (!entry->is_sub_map) &&
1888                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1889                    (entry->inheritance == inheritance) &&
1890                    (entry->protection == cur_protection) &&
1891                    (entry->max_protection == max_protection) &&
1892                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1893                    (entry->in_transition == 0) &&
1894                    (entry->no_cache == no_cache) &&
1895                    ((entry->vme_end - entry->vme_start) + size <=
1896                     (alias == VM_MEMORY_REALLOC ?
1897                      ANON_CHUNK_SIZE :
1898                      NO_COALESCE_LIMIT)) &&
1899                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1900                 if (vm_object_coalesce(entry->object.vm_object,
1901                                        VM_OBJECT_NULL,
1902                                        entry->offset,
1903                                        (vm_object_offset_t) 0,
1904                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1905                                        (vm_map_size_t)(end - entry->vme_end))) {
1906
1907                         /*
1908                          *      Coalesced the two objects - can extend
1909                          *      the previous map entry to include the
1910                          *      new range.
1911                          */
1912                         map->size += (end - entry->vme_end);
1913                         assert(entry->vme_start < end);
1914                         entry->vme_end = end;
1915                         vm_map_store_update_first_free(map, map->first_free);
1916                         RETURN(KERN_SUCCESS);
1917                 }
1918         }
1919
1920         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1921         new_entry = NULL;
1922
1923         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1924                 tmp2_end = tmp2_start + step;
1925                 /*
1926                  *      Create a new entry
1927                  *      LP64todo - for now, we can only allocate 4GB internal objects
1928                  *      because the default pager can't page bigger ones.  Remove this
1929                  *      when it can.
1930                  *
1931                  * XXX FBDP
1932                  * The reserved "page zero" in each process's address space can
1933                  * be arbitrarily large.  Splitting it into separate 4GB objects and
1934                  * therefore different VM map entries serves no purpose and just
1935                  * slows down operations on the VM map, so let's not split the
1936                  * allocation into 4GB chunks if the max protection is NONE.  That
1937                  * memory should never be accessible, so it will never get to the
1938                  * default pager.
1939                  */
1940                 tmp_start = tmp2_start;
1941                 if (object == VM_OBJECT_NULL &&
1942                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1943                     max_protection != VM_PROT_NONE &&
1944                     superpage_size == 0)
1945                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1946                 else
1947                         tmp_end = tmp2_end;
1948                 do {
1949                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1950                                                         object, offset, needs_copy,
1951                                                         FALSE, FALSE,
1952                                                         cur_protection, max_protection,
1953                                                         VM_BEHAVIOR_DEFAULT,
1954                                                         (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1955                                                         0, no_cache,
1956                                                         permanent, superpage_size);
1957                         new_entry->alias = alias;
1958                         if (flags & VM_FLAGS_MAP_JIT){
1959                                 if (!(map->jit_entry_exists)){
1960                                         new_entry->used_for_jit = TRUE;
1961                                         map->jit_entry_exists = TRUE;
1962                                 }
1963                         }
1964
1965                         if (is_submap) {
1966                                 vm_map_t        submap;
1967                                 boolean_t       submap_is_64bit;
1968                                 boolean_t       use_pmap;
1969
1970                                 new_entry->is_sub_map = TRUE;
1971                                 submap = (vm_map_t) object;
1972                                 submap_is_64bit = vm_map_is_64bit(submap);
1973                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1974         #ifndef NO_NESTED_PMAP
1975                                 if (use_pmap && submap->pmap == NULL) {
1976                                         /* we need a sub pmap to nest... */
1977                                         submap->pmap = pmap_create(0, submap_is_64bit);
1978                                         if (submap->pmap == NULL) {
1979                                                 /* let's proceed without nesting... */
1980                                         }
1981                                 }
1982                                 if (use_pmap && submap->pmap != NULL) {
1983                                         kr = pmap_nest(map->pmap,
1984                                                        submap->pmap,
1985                                                        tmp_start,
1986                                                        tmp_start,
1987                                                        tmp_end - tmp_start);
1988                                         if (kr != KERN_SUCCESS) {
1989                                                 printf("vm_map_enter: "
1990                                                        "pmap_nest(0x%llx,0x%llx) "
1991                                                        "error 0x%x\n",
1992                                                        (long long)tmp_start,
1993                                                        (long long)tmp_end,
1994                                                        kr);
1995                                         } else {
1996                                                 /* we're now nested ! */
1997                                                 new_entry->use_pmap = TRUE;
1998                                                 pmap_empty = FALSE;
1999                                         }
2000                                 }
2001         #endif /* NO_NESTED_PMAP */
2002                         }
2003                         entry = new_entry;
2004
2005                         if (superpage_size) {
2006                                 vm_page_t pages, m;
2007                                 vm_object_t sp_object;
2008
2009                                 entry->offset = 0;
2010
2011                                 /* allocate one superpage */
2012                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2013                                 if (kr != KERN_SUCCESS) {
2014                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
2015                                         RETURN(kr);
2016                                 }
2017
2018                                 /* create one vm_object per superpage */
2019                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2020                                 sp_object->phys_contiguous = TRUE;
2021                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2022                                 entry->object.vm_object = sp_object;
2023
2024                                 /* enter the base pages into the object */
2025                                 vm_object_lock(sp_object);
2026                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2027                                         m = pages;
2028                                         pmap_zero_page(m->phys_page);
2029                                         pages = NEXT_PAGE(m);
2030                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2031                                         vm_page_insert(m, sp_object, offset);
2032                                 }
2033                                 vm_object_unlock(sp_object);
2034                         }
2035                 } while (tmp_end != tmp2_end &&
2036                          (tmp_start = tmp_end) &&
2037                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2038                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2039         }
2040
2041         vm_map_unlock(map);
2042         map_locked = FALSE;
2043
2044         new_mapping_established = TRUE;
2045
2046         /*      Wire down the new entry if the user
2047          *      requested all new map entries be wired.
2048          */
2049         if ((map->wiring_required)||(superpage_size)) {
2050                 pmap_empty = FALSE; /* pmap won't be empty */
2051                 result = vm_map_wire(map, start, end,
2052                                      new_entry->protection, TRUE);
2053                 RETURN(result);
2054         }
2055
2056         if ((object != VM_OBJECT_NULL) &&
2057             (vm_map_pmap_enter_enable) &&
2058             (!anywhere)  &&
2059             (!needs_copy) &&
2060             (size < (128*1024))) {
2061                 pmap_empty = FALSE; /* pmap won't be empty */
2062
2063                 if (override_nx(map, alias) && cur_protection)
2064                         cur_protection |= VM_PROT_EXECUTE;
2065
2066                 vm_map_pmap_enter(map, start, end,
2067                                   object, offset, cur_protection);
2068         }
2069
2070 BailOut: ;
2071         if (result == KERN_SUCCESS) {
2072                 vm_prot_t pager_prot;
2073                 memory_object_t pager;
2074
2075                 if (pmap_empty &&
2076                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2077                         assert(vm_map_pmap_is_empty(map,
2078                                                     *address,
2079                                                     *address+size));
2080                 }
2081
2082                 /*
2083                  * For "named" VM objects, let the pager know that the
2084                  * memory object is being mapped.  Some pagers need to keep
2085                  * track of this, to know when they can reclaim the memory
2086                  * object, for example.
2087                  * VM calls memory_object_map() for each mapping (specifying
2088                  * the protection of each mapping) and calls
2089                  * memory_object_last_unmap() when all the mappings are gone.
2090                  */
2091                 pager_prot = max_protection;
2092                 if (needs_copy) {
2093                         /*
2094                          * Copy-On-Write mapping: won't modify
2095                          * the memory object.
2096                          */
2097                         pager_prot &= ~VM_PROT_WRITE;
2098                 }
2099                 if (!is_submap &&
2100                     object != VM_OBJECT_NULL &&
2101                     object->named &&
2102                     object->pager != MEMORY_OBJECT_NULL) {
2103                         vm_object_lock(object);
2104                         pager = object->pager;
2105                         if (object->named &&
2106                             pager != MEMORY_OBJECT_NULL) {
2107                                 assert(object->pager_ready);
2108                                 vm_object_mapping_wait(object, THREAD_UNINT);
2109                                 vm_object_mapping_begin(object);
2110                                 vm_object_unlock(object);
2111
2112                                 kr = memory_object_map(pager, pager_prot);
2113                                 assert(kr == KERN_SUCCESS);
2114
2115                                 vm_object_lock(object);
2116                                 vm_object_mapping_end(object);
2117                         }
2118                         vm_object_unlock(object);
2119                 }
2120         } else {
2121                 if (new_mapping_established) {
2122                         /*
2123                          * We have to get rid of the new mappings since we
2124                          * won't make them available to the user.
2125                          * Try and do that atomically, to minimize the risk
2126                          * that someone else create new mappings that range.
2127                          */
2128                         zap_new_map = vm_map_create(PMAP_NULL,
2129                                                     *address,
2130                                                     *address + size,
2131                                                     map->hdr.entries_pageable);
2132                         if (!map_locked) {
2133                                 vm_map_lock(map);
2134                                 map_locked = TRUE;
2135                         }
2136                         (void) vm_map_delete(map, *address, *address+size,
2137                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2138                                              zap_new_map);
2139                 }
2140                 if (zap_old_map != VM_MAP_NULL &&
2141                     zap_old_map->hdr.nentries != 0) {
2142                         vm_map_entry_t  entry1, entry2;
2143
2144                         /*
2145                          * The new mapping failed.  Attempt to restore
2146                          * the old mappings, saved in the "zap_old_map".
2147                          */
2148                         if (!map_locked) {
2149                                 vm_map_lock(map);
2150                                 map_locked = TRUE;
2151                         }
2152
2153                         /* first check if the coast is still clear */
2154                         start = vm_map_first_entry(zap_old_map)->vme_start;
2155                         end = vm_map_last_entry(zap_old_map)->vme_end;
2156                         if (vm_map_lookup_entry(map, start, &entry1) ||
2157                             vm_map_lookup_entry(map, end, &entry2) ||
2158                             entry1 != entry2) {
2159                                 /*
2160                                  * Part of that range has already been
2161                                  * re-mapped:  we can't restore the old
2162                                  * mappings...
2163                                  */
2164                                 vm_map_enter_restore_failures++;
2165                         } else {
2166                                 /*
2167                                  * Transfer the saved map entries from
2168                                  * "zap_old_map" to the original "map",
2169                                  * inserting them all after "entry1".
2170                                  */
2171                                 for (entry2 = vm_map_first_entry(zap_old_map);
2172                                      entry2 != vm_map_to_entry(zap_old_map);
2173                                      entry2 = vm_map_first_entry(zap_old_map)) {
2174                                         vm_map_size_t entry_size;
2175
2176                                         entry_size = (entry2->vme_end -
2177                                                       entry2->vme_start);
2178                                         vm_map_store_entry_unlink(zap_old_map,
2179                                                             entry2);
2180                                         zap_old_map->size -= entry_size;
2181                                         vm_map_store_entry_link(map, entry1, entry2);
2182                                         map->size += entry_size;
2183                                         entry1 = entry2;
2184                                 }
2185                                 if (map->wiring_required) {
2186                                         /*
2187                                          * XXX TODO: we should rewire the
2188                                          * old pages here...
2189                                          */
2190                                 }
2191                                 vm_map_enter_restore_successes++;
2192                         }
2193                 }
2194         }
2195
2196         if (map_locked) {
2197                 vm_map_unlock(map);
2198         }
2199
2200         /*
2201          * Get rid of the "zap_maps" and all the map entries that
2202          * they may still contain.
2203          */
2204         if (zap_old_map != VM_MAP_NULL) {
2205                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2206                 zap_old_map = VM_MAP_NULL;
2207         }
2208         if (zap_new_map != VM_MAP_NULL) {
2209                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2210                 zap_new_map = VM_MAP_NULL;
2211         }
2212
2213         return result;
2214
2215 #undef  RETURN
2216 }
2217
2218 kern_return_t
2219 vm_map_enter_mem_object(
2220         vm_map_t                target_map,
2221         vm_map_offset_t         *address,
2222         vm_map_size_t           initial_size,
2223         vm_map_offset_t         mask,
2224         int                     flags,
2225         ipc_port_t              port,
2226         vm_object_offset_t      offset,
2227         boolean_t               copy,
2228         vm_prot_t               cur_protection,
2229         vm_prot_t               max_protection,
2230         vm_inherit_t            inheritance)
2231 {
2232         vm_map_address_t        map_addr;
2233         vm_map_size_t           map_size;
2234         vm_object_t             object;
2235         vm_object_size_t        size;
2236         kern_return_t           result;
2237         boolean_t               mask_cur_protection, mask_max_protection;
2238
2239         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2240         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2241         cur_protection &= ~VM_PROT_IS_MASK;
2242         max_protection &= ~VM_PROT_IS_MASK;
2243
2244         /*
2245          * Check arguments for validity
2246          */
2247         if ((target_map == VM_MAP_NULL) ||
2248             (cur_protection & ~VM_PROT_ALL) ||
2249             (max_protection & ~VM_PROT_ALL) ||
2250             (inheritance > VM_INHERIT_LAST_VALID) ||
2251             initial_size == 0)
2252                 return KERN_INVALID_ARGUMENT;
2253
2254         map_addr = vm_map_trunc_page(*address);
2255         map_size = vm_map_round_page(initial_size);
2256         size = vm_object_round_page(initial_size);
2257
2258         /*
2259          * Find the vm object (if any) corresponding to this port.
2260          */
2261         if (!IP_VALID(port)) {
2262                 object = VM_OBJECT_NULL;
2263                 offset = 0;
2264                 copy = FALSE;
2265         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2266                 vm_named_entry_t        named_entry;
2267
2268                 named_entry = (vm_named_entry_t) port->ip_kobject;
2269                 /* a few checks to make sure user is obeying rules */
2270                 if (size == 0) {
2271                         if (offset >= named_entry->size)
2272                                 return KERN_INVALID_RIGHT;
2273                         size = named_entry->size - offset;
2274                 }
2275                 if (mask_max_protection) {
2276                         max_protection &= named_entry->protection;
2277                 }
2278                 if (mask_cur_protection) {
2279                         cur_protection &= named_entry->protection;
2280                 }
2281                 if ((named_entry->protection & max_protection) !=
2282                     max_protection)
2283                         return KERN_INVALID_RIGHT;
2284                 if ((named_entry->protection & cur_protection) !=
2285                     cur_protection)
2286                         return KERN_INVALID_RIGHT;
2287                 if (named_entry->size < (offset + size))
2288                         return KERN_INVALID_ARGUMENT;
2289
2290                 /* the callers parameter offset is defined to be the */
2291                 /* offset from beginning of named entry offset in object */
2292                 offset = offset + named_entry->offset;
2293
2294                 named_entry_lock(named_entry);
2295                 if (named_entry->is_sub_map) {
2296                         vm_map_t                submap;
2297
2298                         submap = named_entry->backing.map;
2299                         vm_map_lock(submap);
2300                         vm_map_reference(submap);
2301                         vm_map_unlock(submap);
2302                         named_entry_unlock(named_entry);
2303
2304                         result = vm_map_enter(target_map,
2305                                               &map_addr,
2306                                               map_size,
2307                                               mask,
2308                                               flags | VM_FLAGS_SUBMAP,
2309                                               (vm_object_t) submap,
2310                                               offset,
2311                                               copy,
2312                                               cur_protection,
2313                                               max_protection,
2314                                               inheritance);
2315                         if (result != KERN_SUCCESS) {
2316                                 vm_map_deallocate(submap);
2317                         } else {
2318                                 /*
2319                                  * No need to lock "submap" just to check its
2320                                  * "mapped" flag: that flag is never reset
2321                                  * once it's been set and if we race, we'll
2322                                  * just end up setting it twice, which is OK.
2323                                  */
2324                                 if (submap->mapped == FALSE) {
2325                                         /*
2326                                          * This submap has never been mapped.
2327                                          * Set its "mapped" flag now that it
2328                                          * has been mapped.
2329                                          * This happens only for the first ever
2330                                          * mapping of a "submap".
2331                                          */
2332                                         vm_map_lock(submap);
2333                                         submap->mapped = TRUE;
2334                                         vm_map_unlock(submap);
2335                                 }
2336                                 *address = map_addr;
2337                         }
2338                         return result;
2339
2340                 } else if (named_entry->is_pager) {
2341                         unsigned int    access;
2342                         vm_prot_t       protections;
2343                         unsigned int    wimg_mode;
2344
2345                         protections = named_entry->protection & VM_PROT_ALL;
2346                         access = GET_MAP_MEM(named_entry->protection);
2347
2348                         object = vm_object_enter(named_entry->backing.pager,
2349                                                  named_entry->size,
2350                                                  named_entry->internal,
2351                                                  FALSE,
2352                                                  FALSE);
2353                         if (object == VM_OBJECT_NULL) {
2354                                 named_entry_unlock(named_entry);
2355                                 return KERN_INVALID_OBJECT;
2356                         }
2357
2358                         /* JMM - drop reference on pager here */
2359
2360                         /* create an extra ref for the named entry */
2361                         vm_object_lock(object);
2362                         vm_object_reference_locked(object);
2363                         named_entry->backing.object = object;
2364                         named_entry->is_pager = FALSE;
2365                         named_entry_unlock(named_entry);
2366
2367                         wimg_mode = object->wimg_bits;
2368
2369                         if (access == MAP_MEM_IO) {
2370                                 wimg_mode = VM_WIMG_IO;
2371                         } else if (access == MAP_MEM_COPYBACK) {
2372                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2373                         } else if (access == MAP_MEM_WTHRU) {
2374                                 wimg_mode = VM_WIMG_WTHRU;
2375                         } else if (access == MAP_MEM_WCOMB) {
2376                                 wimg_mode = VM_WIMG_WCOMB;
2377                         }
2378
2379                         /* wait for object (if any) to be ready */
2380                         if (!named_entry->internal) {
2381                                 while (!object->pager_ready) {
2382                                         vm_object_wait(
2383                                                 object,
2384                                                 VM_OBJECT_EVENT_PAGER_READY,
2385                                                 THREAD_UNINT);
2386                                         vm_object_lock(object);
2387                                 }
2388                         }
2389
2390                         if (object->wimg_bits != wimg_mode)
2391                                 vm_object_change_wimg_mode(object, wimg_mode);
2392
2393                         object->true_share = TRUE;
2394
2395                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2396                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2397                         vm_object_unlock(object);
2398                 } else {
2399                         /* This is the case where we are going to map */
2400                         /* an already mapped object.  If the object is */
2401                         /* not ready it is internal.  An external     */
2402                         /* object cannot be mapped until it is ready  */
2403                         /* we can therefore avoid the ready check     */
2404                         /* in this case.  */
2405                         object = named_entry->backing.object;
2406                         assert(object != VM_OBJECT_NULL);
2407                         named_entry_unlock(named_entry);
2408                         vm_object_reference(object);
2409                 }
2410         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2411                 /*
2412                  * JMM - This is temporary until we unify named entries
2413                  * and raw memory objects.
2414                  *
2415                  * Detected fake ip_kotype for a memory object.  In
2416                  * this case, the port isn't really a port at all, but
2417                  * instead is just a raw memory object.
2418                  */
2419
2420                 object = vm_object_enter((memory_object_t)port,
2421                                          size, FALSE, FALSE, FALSE);
2422                 if (object == VM_OBJECT_NULL)
2423                         return KERN_INVALID_OBJECT;
2424
2425                 /* wait for object (if any) to be ready */
2426                 if (object != VM_OBJECT_NULL) {
2427                         if (object == kernel_object) {
2428                                 printf("Warning: Attempt to map kernel object"
2429                                         " by a non-private kernel entity\n");
2430                                 return KERN_INVALID_OBJECT;
2431                         }
2432                         if (!object->pager_ready) {
2433                                 vm_object_lock(object);
2434
2435                                 while (!object->pager_ready) {
2436                                         vm_object_wait(object,
2437                                                        VM_OBJECT_EVENT_PAGER_READY,
2438                                                        THREAD_UNINT);
2439                                         vm_object_lock(object);
2440                                 }
2441                                 vm_object_unlock(object);
2442                         }
2443                 }
2444         } else {
2445                 return KERN_INVALID_OBJECT;
2446         }
2447
2448         if (object != VM_OBJECT_NULL &&
2449             object->named &&
2450             object->pager != MEMORY_OBJECT_NULL &&
2451             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2452                 memory_object_t pager;
2453                 vm_prot_t       pager_prot;
2454                 kern_return_t   kr;
2455
2456                 /*
2457                  * For "named" VM objects, let the pager know that the
2458                  * memory object is being mapped.  Some pagers need to keep
2459                  * track of this, to know when they can reclaim the memory
2460                  * object, for example.
2461                  * VM calls memory_object_map() for each mapping (specifying
2462                  * the protection of each mapping) and calls
2463                  * memory_object_last_unmap() when all the mappings are gone.
2464                  */
2465                 pager_prot = max_protection;
2466                 if (copy) {
2467                         /*
2468                          * Copy-On-Write mapping: won't modify the
2469                          * memory object.
2470                          */
2471                         pager_prot &= ~VM_PROT_WRITE;
2472                 }
2473                 vm_object_lock(object);
2474                 pager = object->pager;
2475                 if (object->named &&
2476                     pager != MEMORY_OBJECT_NULL &&
2477                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2478                         assert(object->pager_ready);
2479                         vm_object_mapping_wait(object, THREAD_UNINT);
2480                         vm_object_mapping_begin(object);
2481                         vm_object_unlock(object);
2482
2483                         kr = memory_object_map(pager, pager_prot);
2484                         assert(kr == KERN_SUCCESS);
2485
2486                         vm_object_lock(object);
2487                         vm_object_mapping_end(object);
2488                 }
2489                 vm_object_unlock(object);
2490         }
2491
2492         /*
2493          *      Perform the copy if requested
2494          */
2495
2496         if (copy) {
2497                 vm_object_t             new_object;
2498                 vm_object_offset_t      new_offset;
2499
2500                 result = vm_object_copy_strategically(object, offset, size,
2501                                                       &new_object, &new_offset,
2502                                                       &copy);
2503
2504
2505                 if (result == KERN_MEMORY_RESTART_COPY) {
2506                         boolean_t success;
2507                         boolean_t src_needs_copy;
2508
2509                         /*
2510                          * XXX
2511                          * We currently ignore src_needs_copy.
2512                          * This really is the issue of how to make
2513                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2514                          * non-kernel users to use. Solution forthcoming.
2515                          * In the meantime, since we don't allow non-kernel
2516                          * memory managers to specify symmetric copy,
2517                          * we won't run into problems here.
2518                          */
2519                         new_object = object;
2520                         new_offset = offset;
2521                         success = vm_object_copy_quickly(&new_object,
2522                                                          new_offset, size,
2523                                                          &src_needs_copy,
2524                                                          &copy);
2525                         assert(success);
2526                         result = KERN_SUCCESS;
2527                 }
2528                 /*
2529                  *      Throw away the reference to the
2530                  *      original object, as it won't be mapped.
2531                  */
2532
2533                 vm_object_deallocate(object);
2534
2535                 if (result != KERN_SUCCESS)
2536                         return result;
2537
2538                 object = new_object;
2539                 offset = new_offset;
2540         }
2541
2542         result = vm_map_enter(target_map,
2543                               &map_addr, map_size,
2544                               (vm_map_offset_t)mask,
2545                               flags,
2546                               object, offset,
2547                               copy,
2548                               cur_protection, max_protection, inheritance);
2549         if (result != KERN_SUCCESS)
2550                 vm_object_deallocate(object);
2551         *address = map_addr;
2552         return result;
2553 }
2554
2555
2556
2557
2558 kern_return_t
2559 vm_map_enter_mem_object_control(
2560         vm_map_t                target_map,
2561         vm_map_offset_t         *address,
2562         vm_map_size_t           initial_size,
2563         vm_map_offset_t         mask,
2564         int                     flags,
2565         memory_object_control_t control,
2566         vm_object_offset_t      offset,
2567         boolean_t               copy,
2568         vm_prot_t               cur_protection,
2569         vm_prot_t               max_protection,
2570         vm_inherit_t            inheritance)
2571 {
2572         vm_map_address_t        map_addr;
2573         vm_map_size_t           map_size;
2574         vm_object_t             object;
2575         vm_object_size_t        size;
2576         kern_return_t           result;
2577         memory_object_t         pager;
2578         vm_prot_t               pager_prot;
2579         kern_return_t           kr;
2580
2581         /*
2582          * Check arguments for validity
2583          */
2584         if ((target_map == VM_MAP_NULL) ||
2585             (cur_protection & ~VM_PROT_ALL) ||
2586             (max_protection & ~VM_PROT_ALL) ||
2587             (inheritance > VM_INHERIT_LAST_VALID) ||
2588             initial_size == 0)
2589                 return KERN_INVALID_ARGUMENT;
2590
2591         map_addr = vm_map_trunc_page(*address);
2592         map_size = vm_map_round_page(initial_size);
2593         size = vm_object_round_page(initial_size);
2594
2595         object = memory_object_control_to_vm_object(control);
2596
2597         if (object == VM_OBJECT_NULL)
2598                 return KERN_INVALID_OBJECT;
2599
2600         if (object == kernel_object) {
2601                 printf("Warning: Attempt to map kernel object"
2602                        " by a non-private kernel entity\n");
2603                 return KERN_INVALID_OBJECT;
2604         }
2605
2606         vm_object_lock(object);
2607         object->ref_count++;
2608         vm_object_res_reference(object);
2609
2610         /*
2611          * For "named" VM objects, let the pager know that the
2612          * memory object is being mapped.  Some pagers need to keep
2613          * track of this, to know when they can reclaim the memory
2614          * object, for example.
2615          * VM calls memory_object_map() for each mapping (specifying
2616          * the protection of each mapping) and calls
2617          * memory_object_last_unmap() when all the mappings are gone.
2618          */
2619         pager_prot = max_protection;
2620         if (copy) {
2621                 pager_prot &= ~VM_PROT_WRITE;
2622         }
2623         pager = object->pager;
2624         if (object->named &&
2625             pager != MEMORY_OBJECT_NULL &&
2626             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2627                 assert(object->pager_ready);
2628                 vm_object_mapping_wait(object, THREAD_UNINT);
2629                 vm_object_mapping_begin(object);
2630                 vm_object_unlock(object);
2631
2632                 kr = memory_object_map(pager, pager_prot);
2633                 assert(kr == KERN_SUCCESS);
2634
2635                 vm_object_lock(object);
2636                 vm_object_mapping_end(object);
2637         }
2638         vm_object_unlock(object);
2639
2640         /*
2641          *      Perform the copy if requested
2642          */
2643
2644         if (copy) {
2645                 vm_object_t             new_object;
2646                 vm_object_offset_t      new_offset;
2647
2648                 result = vm_object_copy_strategically(object, offset, size,
2649                                                       &new_object, &new_offset,
2650                                                       &copy);
2651
2652
2653                 if (result == KERN_MEMORY_RESTART_COPY) {
2654                         boolean_t success;
2655                         boolean_t src_needs_copy;
2656
2657                         /*
2658                          * XXX
2659                          * We currently ignore src_needs_copy.
2660                          * This really is the issue of how to make
2661                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2662                          * non-kernel users to use. Solution forthcoming.
2663                          * In the meantime, since we don't allow non-kernel
2664                          * memory managers to specify symmetric copy,
2665                          * we won't run into problems here.
2666                          */
2667                         new_object = object;
2668                         new_offset = offset;
2669                         success = vm_object_copy_quickly(&new_object,
2670                                                          new_offset, size,
2671                                                          &src_needs_copy,
2672                                                          &copy);
2673                         assert(success);
2674                         result = KERN_SUCCESS;
2675                 }
2676                 /*
2677                  *      Throw away the reference to the
2678                  *      original object, as it won't be mapped.
2679                  */
2680
2681                 vm_object_deallocate(object);
2682
2683                 if (result != KERN_SUCCESS)
2684                         return result;
2685
2686                 object = new_object;
2687                 offset = new_offset;
2688         }
2689
2690         result = vm_map_enter(target_map,
2691                               &map_addr, map_size,
2692                               (vm_map_offset_t)mask,
2693                               flags,
2694                               object, offset,
2695                               copy,
2696                               cur_protection, max_protection, inheritance);
2697         if (result != KERN_SUCCESS)
2698                 vm_object_deallocate(object);
2699         *address = map_addr;
2700
2701         return result;
2702 }
2703
2704
2705 #if     VM_CPM
2706
2707 #ifdef MACH_ASSERT
2708 extern pmap_paddr_t     avail_start, avail_end;
2709 #endif
2710
2711 /*
2712  *      Allocate memory in the specified map, with the caveat that
2713  *      the memory is physically contiguous.  This call may fail
2714  *      if the system can't find sufficient contiguous memory.
2715  *      This call may cause or lead to heart-stopping amounts of
2716  *      paging activity.
2717  *
2718  *      Memory obtained from this call should be freed in the
2719  *      normal way, viz., via vm_deallocate.
2720  */
2721 kern_return_t
2722 vm_map_enter_cpm(
2723         vm_map_t                map,
2724         vm_map_offset_t *addr,
2725         vm_map_size_t           size,
2726         int                     flags)
2727 {
2728         vm_object_t             cpm_obj;
2729         pmap_t                  pmap;
2730         vm_page_t               m, pages;
2731         kern_return_t           kr;
2732         vm_map_offset_t         va, start, end, offset;
2733 #if     MACH_ASSERT
2734         vm_map_offset_t         prev_addr;
2735 #endif  /* MACH_ASSERT */
2736
2737         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2738
2739         if (!vm_allocate_cpm_enabled)
2740                 return KERN_FAILURE;
2741
2742         if (size == 0) {
2743                 *addr = 0;
2744                 return KERN_SUCCESS;
2745         }
2746         if (anywhere)
2747                 *addr = vm_map_min(map);
2748         else
2749                 *addr = vm_map_trunc_page(*addr);
2750         size = vm_map_round_page(size);
2751
2752         /*
2753          * LP64todo - cpm_allocate should probably allow
2754          * allocations of >4GB, but not with the current
2755          * algorithm, so just cast down the size for now.
2756          */
2757         if (size > VM_MAX_ADDRESS)
2758                 return KERN_RESOURCE_SHORTAGE;
2759         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2760                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2761                 return kr;
2762
2763         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2764         assert(cpm_obj != VM_OBJECT_NULL);
2765         assert(cpm_obj->internal);
2766         assert(cpm_obj->size == (vm_object_size_t)size);
2767         assert(cpm_obj->can_persist == FALSE);
2768         assert(cpm_obj->pager_created == FALSE);
2769         assert(cpm_obj->pageout == FALSE);
2770         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2771
2772         /*
2773          *      Insert pages into object.
2774          */
2775
2776         vm_object_lock(cpm_obj);
2777         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2778                 m = pages;
2779                 pages = NEXT_PAGE(m);
2780                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2781
2782                 assert(!m->gobbled);
2783                 assert(!m->wanted);
2784                 assert(!m->pageout);
2785                 assert(!m->tabled);
2786                 assert(VM_PAGE_WIRED(m));
2787                 /*
2788                  * ENCRYPTED SWAP:
2789                  * "m" is not supposed to be pageable, so it
2790                  * should not be encrypted.  It wouldn't be safe
2791                  * to enter it in a new VM object while encrypted.
2792                  */
2793                 ASSERT_PAGE_DECRYPTED(m);
2794                 assert(m->busy);
2795                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2796
2797                 m->busy = FALSE;
2798                 vm_page_insert(m, cpm_obj, offset);
2799         }
2800         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2801         vm_object_unlock(cpm_obj);
2802
2803         /*
2804          *      Hang onto a reference on the object in case a
2805          *      multi-threaded application for some reason decides
2806          *      to deallocate the portion of the address space into
2807          *      which we will insert this object.
2808          *
2809          *      Unfortunately, we must insert the object now before
2810          *      we can talk to the pmap module about which addresses
2811          *      must be wired down.  Hence, the race with a multi-
2812          *      threaded app.
2813          */
2814         vm_object_reference(cpm_obj);
2815
2816         /*
2817          *      Insert object into map.
2818          */
2819
2820         kr = vm_map_enter(
2821                 map,
2822                 addr,
2823                 size,
2824                 (vm_map_offset_t)0,
2825                 flags,
2826                 cpm_obj,
2827                 (vm_object_offset_t)0,
2828                 FALSE,
2829                 VM_PROT_ALL,
2830                 VM_PROT_ALL,
2831                 VM_INHERIT_DEFAULT);
2832
2833         if (kr != KERN_SUCCESS) {
2834                 /*
2835                  *      A CPM object doesn't have can_persist set,
2836                  *      so all we have to do is deallocate it to
2837                  *      free up these pages.
2838                  */
2839                 assert(cpm_obj->pager_created == FALSE);
2840                 assert(cpm_obj->can_persist == FALSE);
2841                 assert(cpm_obj->pageout == FALSE);
2842                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2843                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2844                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2845         }
2846
2847         /*
2848          *      Inform the physical mapping system that the
2849          *      range of addresses may not fault, so that
2850          *      page tables and such can be locked down as well.
2851          */
2852         start = *addr;
2853         end = start + size;
2854         pmap = vm_map_pmap(map);
2855         pmap_pageable(pmap, start, end, FALSE);
2856
2857         /*
2858          *      Enter each page into the pmap, to avoid faults.
2859          *      Note that this loop could be coded more efficiently,
2860          *      if the need arose, rather than looking up each page
2861          *      again.
2862          */
2863         for (offset = 0, va = start; offset < size;
2864              va += PAGE_SIZE, offset += PAGE_SIZE) {
2865                 int type_of_fault;
2866
2867                 vm_object_lock(cpm_obj);
2868                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2869                 assert(m != VM_PAGE_NULL);
2870
2871                 vm_page_zero_fill(m);
2872
2873                 type_of_fault = DBG_ZERO_FILL_FAULT;
2874
2875                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2876                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2877                                &type_of_fault);
2878
2879                 vm_object_unlock(cpm_obj);
2880         }
2881
2882 #if     MACH_ASSERT
2883         /*
2884          *      Verify ordering in address space.
2885          */
2886         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2887                 vm_object_lock(cpm_obj);
2888                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2889                 vm_object_unlock(cpm_obj);
2890                 if (m == VM_PAGE_NULL)
2891                         panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2892                               cpm_obj, offset);
2893                 assert(m->tabled);
2894                 assert(!m->busy);
2895                 assert(!m->wanted);
2896                 assert(!m->fictitious);
2897                 assert(!m->private);
2898                 assert(!m->absent);
2899                 assert(!m->error);
2900                 assert(!m->cleaning);
2901                 assert(!m->precious);
2902                 assert(!m->clustered);
2903                 if (offset != 0) {
2904                         if (m->phys_page != prev_addr + 1) {
2905                                 printf("start 0x%x end 0x%x va 0x%x\n",
2906                                        start, end, va);
2907                                 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2908                                 printf("m 0x%x prev_address 0x%x\n", m,
2909                                        prev_addr);
2910                                 panic("vm_allocate_cpm:  pages not contig!");
2911                         }
2912                 }
2913                 prev_addr = m->phys_page;
2914         }
2915 #endif  /* MACH_ASSERT */
2916
2917         vm_object_deallocate(cpm_obj); /* kill extra ref */
2918
2919         return kr;
2920 }
2921
2922
2923 #else   /* VM_CPM */
2924
2925 /*
2926  *      Interface is defined in all cases, but unless the kernel
2927  *      is built explicitly for this option, the interface does
2928  *      nothing.
2929  */
2930
2931 kern_return_t
2932 vm_map_enter_cpm(
2933         __unused vm_map_t       map,
2934         __unused vm_map_offset_t        *addr,
2935         __unused vm_map_size_t  size,
2936         __unused int            flags)
2937 {
2938         return KERN_FAILURE;
2939 }
2940 #endif /* VM_CPM */
2941
2942 /* Not used without nested pmaps */
2943 #ifndef NO_NESTED_PMAP
2944 /*
2945  * Clip and unnest a portion of a nested submap mapping.
2946  */
2947
2948
2949 static void
2950 vm_map_clip_unnest(
2951         vm_map_t        map,
2952         vm_map_entry_t  entry,
2953         vm_map_offset_t start_unnest,
2954         vm_map_offset_t end_unnest)
2955 {
2956         vm_map_offset_t old_start_unnest = start_unnest;
2957         vm_map_offset_t old_end_unnest = end_unnest;
2958
2959         assert(entry->is_sub_map);
2960         assert(entry->object.sub_map != NULL);
2961
2962         /*
2963          * Query the platform for the optimal unnest range.
2964          * DRK: There's some duplication of effort here, since
2965          * callers may have adjusted the range to some extent. This
2966          * routine was introduced to support 1GiB subtree nesting
2967          * for x86 platforms, which can also nest on 2MiB boundaries
2968          * depending on size/alignment.
2969          */
2970         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2971                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2972         }
2973
2974         if (entry->vme_start > start_unnest ||
2975             entry->vme_end < end_unnest) {
2976                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2977                       "bad nested entry: start=0x%llx end=0x%llx\n",
2978                       (long long)start_unnest, (long long)end_unnest,
2979                       (long long)entry->vme_start, (long long)entry->vme_end);
2980         }
2981
2982         if (start_unnest > entry->vme_start) {
2983                 _vm_map_clip_start(&map->hdr,
2984                                    entry,
2985                                    start_unnest);
2986                 vm_map_store_update_first_free(map, map->first_free);
2987         }
2988         if (entry->vme_end > end_unnest) {
2989                 _vm_map_clip_end(&map->hdr,
2990                                  entry,
2991                                  end_unnest);
2992                 vm_map_store_update_first_free(map, map->first_free);
2993         }
2994
2995         pmap_unnest(map->pmap,
2996                     entry->vme_start,
2997                     entry->vme_end - entry->vme_start);
2998         if ((map->mapped) && (map->ref_count)) {
2999                 /* clean up parent map/maps */
3000                 vm_map_submap_pmap_clean(
3001                         map, entry->vme_start,
3002                         entry->vme_end,
3003                         entry->object.sub_map,
3004                         entry->offset);
3005         }
3006         entry->use_pmap = FALSE;
3007 }
3008 #endif  /* NO_NESTED_PMAP */
3009
3010 /*
3011  *      vm_map_clip_start:      [ internal use only ]
3012  *
3013  *      Asserts that the given entry begins at or after
3014  *      the specified address; if necessary,
3015  *      it splits the entry into two.
3016  */
3017 void
3018 vm_map_clip_start(
3019         vm_map_t        map,
3020         vm_map_entry_t  entry,
3021         vm_map_offset_t startaddr)
3022 {
3023 #ifndef NO_NESTED_PMAP
3024         if (entry->use_pmap &&
3025             startaddr >= entry->vme_start) {
3026                 vm_map_offset_t start_unnest, end_unnest;
3027
3028                 /*
3029                  * Make sure "startaddr" is no longer in a nested range
3030                  * before we clip.  Unnest only the minimum range the platform
3031                  * can handle.
3032                  * vm_map_clip_unnest may perform additional adjustments to
3033                  * the unnest range.
3034                  */
3035                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3036                 end_unnest = start_unnest + pmap_nesting_size_min;
3037                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3038         }
3039 #endif /* NO_NESTED_PMAP */
3040         if (startaddr > entry->vme_start) {
3041                 if (entry->object.vm_object &&
3042                     !entry->is_sub_map &&
3043                     entry->object.vm_object->phys_contiguous) {
3044                         pmap_remove(map->pmap,
3045                                     (addr64_t)(entry->vme_start),
3046                                     (addr64_t)(entry->vme_end));
3047                 }
3048                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3049                 vm_map_store_update_first_free(map, map->first_free);
3050         }
3051 }
3052
3053
3054 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3055         MACRO_BEGIN \
3056         if ((startaddr) > (entry)->vme_start) \
3057                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3058         MACRO_END
3059
3060 /*
3061  *      This routine is called only when it is known that
3062  *      the entry must be split.
3063  */
3064 static void
3065 _vm_map_clip_start(
3066         register struct vm_map_header   *map_header,
3067         register vm_map_entry_t         entry,
3068         register vm_map_offset_t                start)
3069 {
3070         register vm_map_entry_t new_entry;
3071
3072         /*
3073          *      Split off the front portion --
3074          *      note that we must insert the new
3075          *      entry BEFORE this one, so that
3076          *      this entry has the specified starting
3077          *      address.
3078          */
3079
3080         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3081         vm_map_entry_copy_full(new_entry, entry);
3082
3083         new_entry->vme_end = start;
3084         assert(new_entry->vme_start < new_entry->vme_end);
3085         entry->offset += (start - entry->vme_start);
3086         assert(start < entry->vme_end);
3087         entry->vme_start = start;
3088
3089         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3090
3091         if (entry->is_sub_map)
3092                 vm_map_reference(new_entry->object.sub_map);
3093         else
3094                 vm_object_reference(new_entry->object.vm_object);
3095 }
3096
3097
3098 /*
3099  *      vm_map_clip_end:        [ internal use only ]
3100  *
3101  *      Asserts that the given entry ends at or before
3102  *      the specified address; if necessary,
3103  *      it splits the entry into two.
3104  */
3105 void
3106 vm_map_clip_end(
3107         vm_map_t        map,
3108         vm_map_entry_t  entry,
3109         vm_map_offset_t endaddr)
3110 {
3111         if (endaddr > entry->vme_end) {
3112                 /*
3113                  * Within the scope of this clipping, limit "endaddr" to
3114                  * the end of this map entry...
3115                  */
3116                 endaddr = entry->vme_end;
3117         }
3118 #ifndef NO_NESTED_PMAP
3119         if (entry->use_pmap) {
3120                 vm_map_offset_t start_unnest, end_unnest;
3121
3122                 /*
3123                  * Make sure the range between the start of this entry and
3124                  * the new "endaddr" is no longer nested before we clip.
3125                  * Unnest only the minimum range the platform can handle.
3126                  * vm_map_clip_unnest may perform additional adjustments to
3127                  * the unnest range.
3128                  */
3129                 start_unnest = entry->vme_start;
3130                 end_unnest =
3131                         (endaddr + pmap_nesting_size_min - 1) &
3132                         ~(pmap_nesting_size_min - 1);
3133                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3134         }
3135 #endif /* NO_NESTED_PMAP */
3136         if (endaddr < entry->vme_end) {
3137                 if (entry->object.vm_object &&
3138                     !entry->is_sub_map &&
3139                     entry->object.vm_object->phys_contiguous) {
3140                         pmap_remove(map->pmap,
3141                                     (addr64_t)(entry->vme_start),
3142                                     (addr64_t)(entry->vme_end));
3143                 }
3144                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3145                 vm_map_store_update_first_free(map, map->first_free);
3146         }
3147 }
3148
3149
3150 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3151         MACRO_BEGIN \
3152         if ((endaddr) < (entry)->vme_end) \
3153                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3154         MACRO_END
3155
3156 /*
3157  *      This routine is called only when it is known that
3158  *      the entry must be split.
3159  */
3160 static void
3161 _vm_map_clip_end(
3162         register struct vm_map_header   *map_header,
3163         register vm_map_entry_t         entry,
3164         register vm_map_offset_t        end)
3165 {
3166         register vm_map_entry_t new_entry;
3167
3168         /*
3169          *      Create a new entry and insert it
3170          *      AFTER the specified entry
3171          */
3172
3173         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3174         vm_map_entry_copy_full(new_entry, entry);
3175
3176         assert(entry->vme_start < end);
3177         new_entry->vme_start = entry->vme_end = end;
3178         new_entry->offset += (end - entry->vme_start);
3179         assert(new_entry->vme_start < new_entry->vme_end);
3180
3181         _vm_map_store_entry_link(map_header, entry, new_entry);
3182
3183         if (entry->is_sub_map)
3184                 vm_map_reference(new_entry->object.sub_map);
3185         else
3186                 vm_object_reference(new_entry->object.vm_object);
3187 }
3188
3189
3190 /*
3191  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3192  *
3193  *      Asserts that the starting and ending region
3194  *      addresses fall within the valid range of the map.
3195  */
3196 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3197         MACRO_BEGIN                             \
3198         if (start < vm_map_min(map))            \
3199                 start = vm_map_min(map);        \
3200         if (end > vm_map_max(map))              \
3201                 end = vm_map_max(map);          \
3202         if (start > end)                        \
3203                 start = end;                    \
3204         MACRO_END
3205
3206 /*
3207  *      vm_map_range_check:     [ internal use only ]
3208  *
3209  *      Check that the region defined by the specified start and
3210  *      end addresses are wholly contained within a single map
3211  *      entry or set of adjacent map entries of the spacified map,
3212  *      i.e. the specified region contains no unmapped space.
3213  *      If any or all of the region is unmapped, FALSE is returned.
3214  *      Otherwise, TRUE is returned and if the output argument 'entry'
3215  *      is not NULL it points to the map entry containing the start
3216  *      of the region.
3217  *
3218  *      The map is locked for reading on entry and is left locked.
3219  */
3220 static boolean_t
3221 vm_map_range_check(
3222         register vm_map_t       map,
3223         register vm_map_offset_t        start,
3224         register vm_map_offset_t        end,
3225         vm_map_entry_t          *entry)
3226 {
3227         vm_map_entry_t          cur;
3228         register vm_map_offset_t        prev;
3229
3230         /*
3231          *      Basic sanity checks first
3232          */
3233         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3234                 return (FALSE);
3235
3236         /*
3237          *      Check first if the region starts within a valid
3238          *      mapping for the map.
3239          */
3240         if (!vm_map_lookup_entry(map, start, &cur))
3241                 return (FALSE);
3242
3243         /*
3244          *      Optimize for the case that the region is contained
3245          *      in a single map entry.
3246          */
3247         if (entry != (vm_map_entry_t *) NULL)
3248                 *entry = cur;
3249         if (end <= cur->vme_end)
3250                 return (TRUE);
3251
3252         /*
3253          *      If the region is not wholly contained within a
3254          *      single entry, walk the entries looking for holes.
3255          */
3256         prev = cur->vme_end;
3257         cur = cur->vme_next;
3258         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3259                 if (end <= cur->vme_end)
3260                         return (TRUE);
3261                 prev = cur->vme_end;
3262                 cur = cur->vme_next;
3263         }
3264         return (FALSE);
3265 }
3266
3267 /*
3268  *      vm_map_submap:          [ kernel use only ]
3269  *
3270  *      Mark the given range as handled by a subordinate map.
3271  *
3272  *      This range must have been created with vm_map_find using
3273  *      the vm_submap_object, and no other operations may have been
3274  *      performed on this range prior to calling vm_map_submap.
3275  *
3276  *      Only a limited number of operations can be performed
3277  *      within this rage after calling vm_map_submap:
3278  *              vm_fault
3279  *      [Don't try vm_map_copyin!]
3280  *
3281  *      To remove a submapping, one must first remove the
3282  *      range from the superior map, and then destroy the
3283  *      submap (if desired).  [Better yet, don't try it.]
3284  */
3285 kern_return_t
3286 vm_map_submap(
3287         vm_map_t                map,
3288         vm_map_offset_t start,
3289         vm_map_offset_t end,
3290         vm_map_t                submap,
3291         vm_map_offset_t offset,
3292 #ifdef NO_NESTED_PMAP
3293         __unused
3294 #endif  /* NO_NESTED_PMAP */
3295         boolean_t               use_pmap)
3296 {
3297         vm_map_entry_t          entry;
3298         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3299         register vm_object_t    object;
3300
3301         vm_map_lock(map);
3302
3303         if (! vm_map_lookup_entry(map, start, &entry)) {
3304                 entry = entry->vme_next;
3305         }
3306
3307         if (entry == vm_map_to_entry(map) ||
3308             entry->is_sub_map) {
3309                 vm_map_unlock(map);
3310                 return KERN_INVALID_ARGUMENT;
3311         }
3312
3313         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3314         vm_map_clip_start(map, entry, start);
3315         vm_map_clip_end(map, entry, end);
3316
3317         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3318             (!entry->is_sub_map) &&
3319             ((object = entry->object.vm_object) == vm_submap_object) &&
3320             (object->resident_page_count == 0) &&
3321             (object->copy == VM_OBJECT_NULL) &&
3322             (object->shadow == VM_OBJECT_NULL) &&
3323             (!object->pager_created)) {
3324                 entry->offset = (vm_object_offset_t)offset;
3325                 entry->object.vm_object = VM_OBJECT_NULL;
3326                 vm_object_deallocate(object);
3327                 entry->is_sub_map = TRUE;
3328                 entry->object.sub_map = submap;
3329                 vm_map_reference(submap);
3330                 submap->mapped = TRUE;
3331
3332 #ifndef NO_NESTED_PMAP
3333                 if (use_pmap) {
3334                         /* nest if platform code will allow */
3335                         if(submap->pmap == NULL) {
3336                                 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3337                                 if(submap->pmap == PMAP_NULL) {
3338                                         vm_map_unlock(map);
3339                                         return(KERN_NO_SPACE);
3340                                 }
3341                         }
3342                         result = pmap_nest(map->pmap,
3343                                            (entry->object.sub_map)->pmap,
3344                                            (addr64_t)start,
3345                                            (addr64_t)start,
3346                                            (uint64_t)(end - start));
3347                         if(result)
3348                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3349                         entry->use_pmap = TRUE;
3350                 }
3351 #else   /* NO_NESTED_PMAP */
3352                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3353 #endif  /* NO_NESTED_PMAP */
3354                 result = KERN_SUCCESS;
3355         }
3356         vm_map_unlock(map);
3357
3358         return(result);
3359 }
3360
3361 /*
3362  *      vm_map_protect:
3363  *
3364  *      Sets the protection of the specified address
3365  *      region in the target map.  If "set_max" is
3366  *      specified, the maximum protection is to be set;
3367  *      otherwise, only the current protection is affected.
3368  */
3369 kern_return_t
3370 vm_map_protect(
3371         register vm_map_t       map,
3372         register vm_map_offset_t        start,
3373         register vm_map_offset_t        end,
3374         register vm_prot_t      new_prot,
3375         register boolean_t      set_max)
3376 {
3377         register vm_map_entry_t         current;
3378         register vm_map_offset_t        prev;
3379         vm_map_entry_t                  entry;
3380         vm_prot_t                       new_max;
3381
3382         XPR(XPR_VM_MAP,
3383             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3384             map, start, end, new_prot, set_max);
3385
3386         vm_map_lock(map);
3387
3388         /* LP64todo - remove this check when vm_map_commpage64()
3389          * no longer has to stuff in a map_entry for the commpage
3390          * above the map's max_offset.
3391          */
3392         if (start >= map->max_offset) {
3393                 vm_map_unlock(map);
3394                 return(KERN_INVALID_ADDRESS);
3395         }
3396
3397         while(1) {
3398                 /*
3399                  *      Lookup the entry.  If it doesn't start in a valid
3400                  *      entry, return an error.
3401                  */
3402                 if (! vm_map_lookup_entry(map, start, &entry)) {
3403                         vm_map_unlock(map);
3404                         return(KERN_INVALID_ADDRESS);
3405                 }
3406
3407                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3408                         start = SUPERPAGE_ROUND_DOWN(start);
3409                         continue;
3410                 }
3411                 break;
3412         }
3413         if (entry->superpage_size)
3414                 end = SUPERPAGE_ROUND_UP(end);
3415
3416         /*
3417          *      Make a first pass to check for protection and address
3418          *      violations.
3419          */
3420
3421         current = entry;
3422         prev = current->vme_start;
3423         while ((current != vm_map_to_entry(map)) &&
3424                (current->vme_start < end)) {
3425
3426                 /*
3427                  * If there is a hole, return an error.
3428                  */
3429                 if (current->vme_start != prev) {
3430                         vm_map_unlock(map);
3431                         return(KERN_INVALID_ADDRESS);
3432                 }
3433
3434                 new_max = current->max_protection;
3435                 if(new_prot & VM_PROT_COPY) {
3436                         new_max |= VM_PROT_WRITE;
3437                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3438                                 vm_map_unlock(map);
3439                                 return(KERN_PROTECTION_FAILURE);
3440                         }
3441                 } else {
3442                         if ((new_prot & new_max) != new_prot) {
3443                                 vm_map_unlock(map);
3444                                 return(KERN_PROTECTION_FAILURE);
3445                         }
3446                 }
3447
3448 #if CONFIG_EMBEDDED
3449                 if (new_prot & VM_PROT_WRITE) {
3450                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3451                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3452                                 new_prot &= ~VM_PROT_EXECUTE;
3453                         }
3454                 }
3455 #endif
3456
3457                 prev = current->vme_end;
3458                 current = current->vme_next;
3459         }
3460         if (end > prev) {
3461                 vm_map_unlock(map);
3462                 return(KERN_INVALID_ADDRESS);
3463         }
3464
3465         /*
3466          *      Go back and fix up protections.
3467          *      Clip to start here if the range starts within
3468          *      the entry.
3469          */
3470
3471         current = entry;
3472         if (current != vm_map_to_entry(map)) {
3473                 /* clip and unnest if necessary */
3474                 vm_map_clip_start(map, current, start);
3475         }
3476
3477         while ((current != vm_map_to_entry(map)) &&
3478                (current->vme_start < end)) {
3479
3480                 vm_prot_t       old_prot;
3481
3482                 vm_map_clip_end(map, current, end);
3483
3484                 assert(!current->use_pmap); /* clipping did unnest if needed */
3485
3486                 old_prot = current->protection;
3487
3488                 if(new_prot & VM_PROT_COPY) {
3489                         /* caller is asking specifically to copy the      */
3490                         /* mapped data, this implies that max protection  */
3491                         /* will include write.  Caller must be prepared   */
3492                         /* for loss of shared memory communication in the */
3493                         /* target area after taking this step */
3494
3495                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3496                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3497                                 current->offset = 0;
3498                         }
3499                         current->needs_copy = TRUE;
3500                         current->max_protection |= VM_PROT_WRITE;
3501                 }
3502
3503                 if (set_max)
3504                         current->protection =
3505                                 (current->max_protection =
3506                                  new_prot & ~VM_PROT_COPY) &
3507                                 old_prot;
3508                 else
3509                         current->protection = new_prot & ~VM_PROT_COPY;
3510
3511                 /*
3512                  *      Update physical map if necessary.
3513                  *      If the request is to turn off write protection,
3514                  *      we won't do it for real (in pmap). This is because
3515                  *      it would cause copy-on-write to fail.  We've already
3516                  *      set, the new protection in the map, so if a
3517                  *      write-protect fault occurred, it will be fixed up
3518                  *      properly, COW or not.
3519                  */
3520                 if (current->protection != old_prot) {
3521                         /* Look one level in we support nested pmaps */
3522                         /* from mapped submaps which are direct entries */
3523                         /* in our map */
3524
3525                         vm_prot_t prot;
3526
3527                         prot = current->protection & ~VM_PROT_WRITE;
3528
3529                         if (override_nx(map, current->alias) && prot)
3530                                 prot |= VM_PROT_EXECUTE;
3531
3532                         if (current->is_sub_map && current->use_pmap) {
3533                                 pmap_protect(current->object.sub_map->pmap,
3534                                              current->vme_start,
3535                                              current->vme_end,
3536                                              prot);
3537                         } else {
3538                                 pmap_protect(map->pmap,
3539                                              current->vme_start,
3540                                              current->vme_end,
3541                                              prot);
3542                         }
3543                 }
3544                 current = current->vme_next;
3545         }
3546
3547         current = entry;
3548         while ((current != vm_map_to_entry(map)) &&
3549                (current->vme_start <= end)) {
3550                 vm_map_simplify_entry(map, current);
3551                 current = current->vme_next;
3552         }
3553
3554         vm_map_unlock(map);
3555         return(KERN_SUCCESS);
3556 }
3557
3558 /*
3559  *      vm_map_inherit:
3560  *
3561  *      Sets the inheritance of the specified address
3562  *      range in the target map.  Inheritance
3563  *      affects how the map will be shared with
3564  *      child maps at the time of vm_map_fork.
3565  */
3566 kern_return_t
3567 vm_map_inherit(
3568         register vm_map_t       map,
3569         register vm_map_offset_t        start,
3570         register vm_map_offset_t        end,
3571         register vm_inherit_t   new_inheritance)
3572 {
3573         register vm_map_entry_t entry;
3574         vm_map_entry_t  temp_entry;
3575
3576         vm_map_lock(map);
3577
3578         VM_MAP_RANGE_CHECK(map, start, end);
3579
3580         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3581                 entry = temp_entry;
3582         }
3583         else {
3584                 temp_entry = temp_entry->vme_next;
3585                 entry = temp_entry;
3586         }
3587
3588         /* first check entire range for submaps which can't support the */
3589         /* given inheritance. */
3590         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3591                 if(entry->is_sub_map) {
3592                         if(new_inheritance == VM_INHERIT_COPY) {
3593                                 vm_map_unlock(map);
3594                                 return(KERN_INVALID_ARGUMENT);
3595                         }
3596                 }
3597
3598                 entry = entry->vme_next;
3599         }
3600
3601         entry = temp_entry;
3602         if (entry != vm_map_to_entry(map)) {
3603                 /* clip and unnest if necessary */
3604                 vm_map_clip_start(map, entry, start);
3605         }
3606
3607         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3608                 vm_map_clip_end(map, entry, end);
3609                 assert(!entry->use_pmap); /* clip did unnest if needed */
3610
3611                 entry->inheritance = new_inheritance;
3612
3613                 entry = entry->vme_next;
3614         }
3615
3616         vm_map_unlock(map);
3617         return(KERN_SUCCESS);
3618 }
3619
3620 /*
3621  * Update the accounting for the amount of wired memory in this map.  If the user has
3622  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3623  */
3624
3625 static kern_return_t
3626 add_wire_counts(
3627         vm_map_t        map,
3628         vm_map_entry_t  entry,
3629         boolean_t       user_wire)
3630 {
3631         vm_map_size_t   size;
3632
3633         if (user_wire) {
3634                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3635
3636                 /*
3637                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3638                  * this map entry.
3639                  */
3640
3641                 if (entry->user_wired_count == 0) {
3642                         size = entry->vme_end - entry->vme_start;
3643
3644                         /*
3645                          * Since this is the first time the user is wiring this map entry, check to see if we're
3646                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3647                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3648                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3649                          * limit, then we fail.
3650                          */
3651
3652                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3653                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3654                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3655                                 return KERN_RESOURCE_SHORTAGE;
3656
3657                         /*
3658                          * The first time the user wires an entry, we also increment the wired_count and add this to
3659                          * the total that has been wired in the map.
3660                          */
3661
3662                         if (entry->wired_count >= MAX_WIRE_COUNT)
3663                                 return KERN_FAILURE;
3664
3665                         entry->wired_count++;
3666                         map->user_wire_size += size;
3667                 }
3668
3669                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3670                         return KERN_FAILURE;
3671
3672                 entry->user_wired_count++;
3673
3674         } else {
3675
3676                 /*
3677                  * The kernel's wiring the memory.  Just bump the count and continue.
3678                  */
3679
3680                 if (entry->wired_count >= MAX_WIRE_COUNT)
3681                         panic("vm_map_wire: too many wirings");
3682
3683                 entry->wired_count++;
3684         }
3685
3686         return KERN_SUCCESS;
3687 }
3688
3689 /*
3690  * Update the memory wiring accounting now that the given map entry is being unwired.
3691  */
3692
3693 static void
3694 subtract_wire_counts(
3695         vm_map_t        map,
3696         vm_map_entry_t  entry,
3697         boolean_t       user_wire)
3698 {
3699
3700         if (user_wire) {
3701
3702                 /*
3703                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3704                  */
3705
3706                 if (entry->user_wired_count == 1) {
3707
3708                         /*
3709                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3710                          * user wired memory for this map.
3711                          */
3712
3713                         assert(entry->wired_count >= 1);
3714                         entry->wired_count--;
3715                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3716                 }
3717
3718                 assert(entry->user_wired_count >= 1);
3719                 entry->user_wired_count--;
3720
3721         } else {
3722
3723                 /*
3724                  * The kernel is unwiring the memory.   Just update the count.
3725                  */
3726
3727                 assert(entry->wired_count >= 1);
3728                 entry->wired_count--;
3729         }
3730 }
3731
3732 /*
3733  *      vm_map_wire:
3734  *
3735  *      Sets the pageability of the specified address range in the
3736  *      target map as wired.  Regions specified as not pageable require
3737  *      locked-down physical memory and physical page maps.  The
3738  *      access_type variable indicates types of accesses that must not
3739  *      generate page faults.  This is checked against protection of
3740  *      memory being locked-down.
3741  *
3742  *      The map must not be locked, but a reference must remain to the
3743  *      map throughout the call.
3744  */
3745 static kern_return_t
3746 vm_map_wire_nested(
3747         register vm_map_t       map,
3748         register vm_map_offset_t        start,
3749         register vm_map_offset_t        end,
3750         register vm_prot_t      access_type,
3751         boolean_t               user_wire,
3752         pmap_t                  map_pmap,
3753         vm_map_offset_t         pmap_addr)
3754 {
3755         register vm_map_entry_t entry;
3756         struct vm_map_entry     *first_entry, tmp_entry;
3757         vm_map_t                real_map;
3758         register vm_map_offset_t        s,e;
3759         kern_return_t           rc;
3760         boolean_t               need_wakeup;
3761         boolean_t               main_map = FALSE;
3762         wait_interrupt_t        interruptible_state;
3763         thread_t                cur_thread;
3764         unsigned int            last_timestamp;
3765         vm_map_size_t           size;
3766
3767         vm_map_lock(map);
3768         if(map_pmap == NULL)
3769                 main_map = TRUE;
3770         last_timestamp = map->timestamp;
3771
3772         VM_MAP_RANGE_CHECK(map, start, end);
3773         assert(page_aligned(start));
3774         assert(page_aligned(end));
3775         if (start == end) {
3776                 /* We wired what the caller asked for, zero pages */
3777                 vm_map_unlock(map);
3778                 return KERN_SUCCESS;
3779         }
3780
3781         need_wakeup = FALSE;
3782         cur_thread = current_thread();
3783
3784         s = start;
3785         rc = KERN_SUCCESS;
3786
3787         if (vm_map_lookup_entry(map, s, &first_entry)) {
3788                 entry = first_entry;
3789                 /*
3790                  * vm_map_clip_start will be done later.
3791                  * We don't want to unnest any nested submaps here !
3792                  */
3793         } else {
3794                 /* Start address is not in map */
3795                 rc = KERN_INVALID_ADDRESS;
3796                 goto done;
3797         }
3798
3799         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3800                 /*
3801                  * At this point, we have wired from "start" to "s".
3802                  * We still need to wire from "s" to "end".
3803                  *
3804                  * "entry" hasn't been clipped, so it could start before "s"
3805                  * and/or end after "end".
3806                  */
3807
3808                 /* "e" is how far we want to wire in this entry */
3809                 e = entry->vme_end;
3810                 if (e > end)
3811                         e = end;
3812
3813                 /*
3814                  * If another thread is wiring/unwiring this entry then
3815                  * block after informing other thread to wake us up.
3816                  */
3817                 if (entry->in_transition) {
3818                         wait_result_t wait_result;
3819
3820                         /*
3821                          * We have not clipped the entry.  Make sure that
3822                          * the start address is in range so that the lookup
3823                          * below will succeed.
3824                          * "s" is the current starting point: we've already
3825                          * wired from "start" to "s" and we still have
3826                          * to wire from "s" to "end".
3827                          */
3828
3829                         entry->needs_wakeup = TRUE;
3830
3831                         /*
3832                          * wake up anybody waiting on entries that we have
3833                          * already wired.
3834                          */
3835                         if (need_wakeup) {
3836                                 vm_map_entry_wakeup(map);
3837                                 need_wakeup = FALSE;
3838                         }
3839                         /*
3840                          * User wiring is interruptible
3841                          */
3842                         wait_result = vm_map_entry_wait(map,
3843                                                         (user_wire) ? THREAD_ABORTSAFE :
3844                                                         THREAD_UNINT);
3845                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3846                                 /*
3847                                  * undo the wirings we have done so far
3848                                  * We do not clear the needs_wakeup flag,
3849                                  * because we cannot tell if we were the
3850                                  * only one waiting.
3851                                  */
3852                                 rc = KERN_FAILURE;
3853                                 goto done;
3854                         }
3855
3856                         /*
3857                          * Cannot avoid a lookup here. reset timestamp.
3858                          */
3859                         last_timestamp = map->timestamp;
3860
3861                         /*
3862                          * The entry could have been clipped, look it up again.
3863                          * Worse that can happen is, it may not exist anymore.
3864                          */
3865                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3866                                 /*
3867                                  * User: undo everything upto the previous
3868                                  * entry.  let vm_map_unwire worry about
3869                                  * checking the validity of the range.
3870                                  */
3871                                 rc = KERN_FAILURE;
3872                                 goto done;
3873                         }
3874                         entry = first_entry;
3875                         continue;
3876                 }
3877
3878                 if (entry->is_sub_map) {
3879                         vm_map_offset_t sub_start;
3880                         vm_map_offset_t sub_end;
3881                         vm_map_offset_t local_start;
3882                         vm_map_offset_t local_end;
3883                         pmap_t          pmap;
3884
3885                         vm_map_clip_start(map, entry, s);
3886                         vm_map_clip_end(map, entry, end);
3887
3888                         sub_start = entry->offset;
3889                         sub_end = entry->vme_end;
3890                         sub_end += entry->offset - entry->vme_start;
3891
3892                         local_end = entry->vme_end;
3893                         if(map_pmap == NULL) {
3894                                 vm_object_t             object;
3895                                 vm_object_offset_t      offset;
3896                                 vm_prot_t               prot;
3897                                 boolean_t               wired;
3898                                 vm_map_entry_t          local_entry;
3899                                 vm_map_version_t         version;
3900                                 vm_map_t                lookup_map;
3901
3902                                 if(entry->use_pmap) {
3903                                         pmap = entry->object.sub_map->pmap;
3904                                         /* ppc implementation requires that */
3905                                         /* submaps pmap address ranges line */
3906                                         /* up with parent map */
3907 #ifdef notdef
3908                                         pmap_addr = sub_start;
3909 #endif
3910                                         pmap_addr = s;
3911                                 } else {
3912                                         pmap = map->pmap;
3913                                         pmap_addr = s;
3914                                 }
3915
3916                                 if (entry->wired_count) {
3917                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3918                                                 goto done;
3919
3920                                         /*
3921                                          * The map was not unlocked:
3922                                          * no need to goto re-lookup.
3923                                          * Just go directly to next entry.
3924                                          */
3925                                         entry = entry->vme_next;
3926                                         s = entry->vme_start;
3927                                         continue;
3928
3929                                 }
3930
3931                                 /* call vm_map_lookup_locked to */
3932                                 /* cause any needs copy to be   */
3933                                 /* evaluated */
3934                                 local_start = entry->vme_start;
3935                                 lookup_map = map;
3936                                 vm_map_lock_write_to_read(map);
3937                                 if(vm_map_lookup_locked(
3938                                            &lookup_map, local_start,
3939                                            access_type,
3940                                            OBJECT_LOCK_EXCLUSIVE,
3941                                            &version, &object,
3942                                            &offset, &prot, &wired,
3943                                            NULL,
3944                                            &real_map)) {
3945
3946                                         vm_map_unlock_read(lookup_map);
3947                                         vm_map_unwire(map, start,
3948                                                       s, user_wire);
3949                                         return(KERN_FAILURE);
3950                                 }
3951                                 if(real_map != lookup_map)
3952                                         vm_map_unlock(real_map);
3953                                 vm_map_unlock_read(lookup_map);
3954                                 vm_map_lock(map);
3955                                 vm_object_unlock(object);
3956
3957                                 /* we unlocked, so must re-lookup */
3958                                 if (!vm_map_lookup_entry(map,
3959                                                          local_start,
3960                                                          &local_entry)) {
3961                                         rc = KERN_FAILURE;
3962                                         goto done;
3963                                 }
3964
3965                                 /*
3966                                  * entry could have been "simplified",
3967                                  * so re-clip
3968                                  */
3969                                 entry = local_entry;
3970                                 assert(s == local_start);
3971                                 vm_map_clip_start(map, entry, s);
3972                                 vm_map_clip_end(map, entry, end);
3973                                 /* re-compute "e" */
3974                                 e = entry->vme_end;
3975                                 if (e > end)
3976                                         e = end;
3977
3978                                 /* did we have a change of type? */
3979                                 if (!entry->is_sub_map) {
3980                                         last_timestamp = map->timestamp;
3981                                         continue;
3982                                 }
3983                         } else {
3984                                 local_start = entry->vme_start;
3985                                 pmap = map_pmap;
3986                         }
3987
3988                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3989                                 goto done;
3990
3991                         entry->in_transition = TRUE;
3992
3993                         vm_map_unlock(map);
3994                         rc = vm_map_wire_nested(entry->object.sub_map,
3995                                                 sub_start, sub_end,
3996                                                 access_type,
3997                                                 user_wire, pmap, pmap_addr);
3998                         vm_map_lock(map);
3999
4000                         /*
4001                          * Find the entry again.  It could have been clipped
4002                          * after we unlocked the map.
4003                          */
4004                         if (!vm_map_lookup_entry(map, local_start,
4005                                                  &first_entry))
4006                                 panic("vm_map_wire: re-lookup failed");
4007                         entry = first_entry;
4008
4009                         assert(local_start == s);
4010                         /* re-compute "e" */
4011                         e = entry->vme_end;
4012                         if (e > end)
4013                                 e = end;
4014
4015                         last_timestamp = map->timestamp;
4016                         while ((entry != vm_map_to_entry(map)) &&
4017                                (entry->vme_start < e)) {
4018                                 assert(entry->in_transition);
4019                                 entry->in_transition = FALSE;
4020                                 if (entry->needs_wakeup) {
4021                                         entry->needs_wakeup = FALSE;
4022                                         need_wakeup = TRUE;
4023                                 }
4024                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4025                                         subtract_wire_counts(map, entry, user_wire);
4026                                 }
4027                                 entry = entry->vme_next;
4028                         }
4029                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4030                                 goto done;
4031                         }
4032
4033                         /* no need to relookup again */
4034                         s = entry->vme_start;
4035                         continue;
4036                 }
4037
4038                 /*
4039                  * If this entry is already wired then increment
4040                  * the appropriate wire reference count.
4041                  */
4042                 if (entry->wired_count) {
4043                         /*
4044                          * entry is already wired down, get our reference
4045                          * after clipping to our range.
4046                          */
4047                         vm_map_clip_start(map, entry, s);
4048                         vm_map_clip_end(map, entry, end);
4049
4050                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4051                                 goto done;
4052
4053                         /* map was not unlocked: no need to relookup */
4054                         entry = entry->vme_next;
4055                         s = entry->vme_start;
4056                         continue;
4057                 }
4058
4059                 /*
4060                  * Unwired entry or wire request transmitted via submap
4061                  */
4062
4063
4064                 /*
4065                  * Perform actions of vm_map_lookup that need the write
4066                  * lock on the map: create a shadow object for a
4067                  * copy-on-write region, or an object for a zero-fill
4068                  * region.
4069                  */
4070                 size = entry->vme_end - entry->vme_start;
4071                 /*
4072                  * If wiring a copy-on-write page, we need to copy it now
4073                  * even if we're only (currently) requesting read access.
4074                  * This is aggressive, but once it's wired we can't move it.
4075                  */
4076                 if (entry->needs_copy) {
4077                         vm_object_shadow(&entry->object.vm_object,
4078                                          &entry->offset, size);
4079                         entry->needs_copy = FALSE;
4080                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4081                         entry->object.vm_object = vm_object_allocate(size);
4082                         entry->offset = (vm_object_offset_t)0;
4083                 }
4084
4085                 vm_map_clip_start(map, entry, s);
4086                 vm_map_clip_end(map, entry, end);
4087
4088                 /* re-compute "e" */
4089                 e = entry->vme_end;
4090                 if (e > end)
4091                         e = end;
4092
4093                 /*
4094                  * Check for holes and protection mismatch.
4095                  * Holes: Next entry should be contiguous unless this
4096                  *        is the end of the region.
4097                  * Protection: Access requested must be allowed, unless
4098                  *      wiring is by protection class
4099                  */
4100                 if ((entry->vme_end < end) &&
4101                     ((entry->vme_next == vm_map_to_entry(map)) ||
4102                      (entry->vme_next->vme_start > entry->vme_end))) {
4103                         /* found a hole */
4104                         rc = KERN_INVALID_ADDRESS;
4105                         goto done;
4106                 }
4107                 if ((entry->protection & access_type) != access_type) {
4108                         /* found a protection problem */
4109                         rc = KERN_PROTECTION_FAILURE;
4110                         goto done;
4111                 }
4112
4113                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4114
4115                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4116                         goto done;
4117
4118                 entry->in_transition = TRUE;
4119
4120                 /*
4121                  * This entry might get split once we unlock the map.
4122                  * In vm_fault_wire(), we need the current range as
4123                  * defined by this entry.  In order for this to work
4124                  * along with a simultaneous clip operation, we make a
4125                  * temporary copy of this entry and use that for the
4126                  * wiring.  Note that the underlying objects do not
4127                  * change during a clip.
4128                  */
4129                 tmp_entry = *entry;
4130
4131                 /*
4132                  * The in_transition state guarentees that the entry
4133                  * (or entries for this range, if split occured) will be
4134                  * there when the map lock is acquired for the second time.
4135                  */
4136                 vm_map_unlock(map);
4137
4138                 if (!user_wire && cur_thread != THREAD_NULL)
4139                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4140                 else
4141                         interruptible_state = THREAD_UNINT;
4142
4143                 if(map_pmap)
4144                         rc = vm_fault_wire(map,
4145                                            &tmp_entry, map_pmap, pmap_addr);
4146                 else
4147                         rc = vm_fault_wire(map,
4148                                            &tmp_entry, map->pmap,
4149                                            tmp_entry.vme_start);
4150
4151                 if (!user_wire && cur_thread != THREAD_NULL)
4152                         thread_interrupt_level(interruptible_state);
4153
4154                 vm_map_lock(map);
4155
4156                 if (last_timestamp+1 != map->timestamp) {
4157                         /*
4158                          * Find the entry again.  It could have been clipped
4159                          * after we unlocked the map.
4160                          */
4161                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4162                                                  &first_entry))
4163                                 panic("vm_map_wire: re-lookup failed");
4164
4165                         entry = first_entry;
4166                 }
4167
4168                 last_timestamp = map->timestamp;
4169
4170                 while ((entry != vm_map_to_entry(map)) &&
4171                        (entry->vme_start < tmp_entry.vme_end)) {
4172                         assert(entry->in_transition);
4173                         entry->in_transition = FALSE;
4174                         if (entry->needs_wakeup) {
4175                                 entry->needs_wakeup = FALSE;
4176                                 need_wakeup = TRUE;
4177                         }
4178                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4179                                 subtract_wire_counts(map, entry, user_wire);
4180                         }
4181                         entry = entry->vme_next;
4182                 }
4183
4184                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4185                         goto done;
4186                 }
4187
4188                 s = entry->vme_start;
4189         } /* end while loop through map entries */
4190
4191 done:
4192         if (rc == KERN_SUCCESS) {
4193                 /* repair any damage we may have made to the VM map */
4194                 vm_map_simplify_range(map, start, end);
4195         }
4196
4197         vm_map_unlock(map);
4198
4199         /*
4200          * wake up anybody waiting on entries we wired.
4201          */
4202         if (need_wakeup)
4203                 vm_map_entry_wakeup(map);
4204
4205         if (rc != KERN_SUCCESS) {
4206                 /* undo what has been wired so far */
4207                 vm_map_unwire(map, start, s, user_wire);
4208         }
4209
4210         return rc;
4211
4212 }
4213
4214 kern_return_t
4215 vm_map_wire(
4216         register vm_map_t       map,
4217         register vm_map_offset_t        start,
4218         register vm_map_offset_t        end,
4219         register vm_prot_t      access_type,
4220         boolean_t               user_wire)
4221 {
4222
4223         kern_return_t   kret;
4224
4225         kret = vm_map_wire_nested(map, start, end, access_type,
4226                                   user_wire, (pmap_t)NULL, 0);
4227         return kret;
4228 }
4229
4230 /*
4231  *      vm_map_unwire:
4232  *
4233  *      Sets the pageability of the specified address range in the target
4234  *      as pageable.  Regions specified must have been wired previously.
4235  *
4236  *      The map must not be locked, but a reference must remain to the map
4237  *      throughout the call.
4238  *
4239  *      Kernel will panic on failures.  User unwire ignores holes and
4240  *      unwired and intransition entries to avoid losing memory by leaving
4241  *      it unwired.
4242  */
4243 static kern_return_t
4244 vm_map_unwire_nested(
4245         register vm_map_t       map,
4246         register vm_map_offset_t        start,
4247         register vm_map_offset_t        end,
4248         boolean_t               user_wire,
4249         pmap_t                  map_pmap,
4250         vm_map_offset_t         pmap_addr)
4251 {
4252         register vm_map_entry_t entry;
4253         struct vm_map_entry     *first_entry, tmp_entry;
4254         boolean_t               need_wakeup;
4255         boolean_t               main_map = FALSE;
4256         unsigned int            last_timestamp;
4257
4258         vm_map_lock(map);
4259         if(map_pmap == NULL)
4260                 main_map = TRUE;
4261         last_timestamp = map->timestamp;
4262
4263         VM_MAP_RANGE_CHECK(map, start, end);
4264         assert(page_aligned(start));
4265         assert(page_aligned(end));
4266
4267         if (start == end) {
4268                 /* We unwired what the caller asked for: zero pages */
4269                 vm_map_unlock(map);
4270                 return KERN_SUCCESS;
4271         }
4272
4273         if (vm_map_lookup_entry(map, start, &first_entry)) {
4274                 entry = first_entry;
4275                 /*
4276                  * vm_map_clip_start will be done later.
4277                  * We don't want to unnest any nested sub maps here !
4278                  */
4279         }
4280         else {
4281                 if (!user_wire) {
4282                         panic("vm_map_unwire: start not found");
4283                 }
4284                 /*      Start address is not in map. */
4285                 vm_map_unlock(map);
4286                 return(KERN_INVALID_ADDRESS);
4287         }
4288
4289         if (entry->superpage_size) {
4290                 /* superpages are always wired */
4291                 vm_map_unlock(map);
4292                 return KERN_INVALID_ADDRESS;
4293         }
4294
4295         need_wakeup = FALSE;
4296         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4297                 if (entry->in_transition) {
4298                         /*
4299                          * 1)
4300                          * Another thread is wiring down this entry. Note
4301                          * that if it is not for the other thread we would
4302                          * be unwiring an unwired entry.  This is not
4303                          * permitted.  If we wait, we will be unwiring memory
4304                          * we did not wire.
4305                          *
4306                          * 2)
4307                          * Another thread is unwiring this entry.  We did not
4308                          * have a reference to it, because if we did, this
4309                          * entry will not be getting unwired now.
4310                          */
4311                         if (!user_wire) {
4312                                 /*
4313                                  * XXX FBDP
4314                                  * This could happen:  there could be some
4315                                  * overlapping vslock/vsunlock operations
4316                                  * going on.
4317                                  * We should probably just wait and retry,
4318                                  * but then we have to be careful that this
4319                                  * entry could get "simplified" after
4320                                  * "in_transition" gets unset and before
4321                                  * we re-lookup the entry, so we would
4322                                  * have to re-clip the entry to avoid
4323                                  * re-unwiring what we have already unwired...
4324                                  * See vm_map_wire_nested().
4325                                  *
4326                                  * Or we could just ignore "in_transition"
4327                                  * here and proceed to decement the wired
4328                                  * count(s) on this entry.  That should be fine
4329                                  * as long as "wired_count" doesn't drop all
4330                                  * the way to 0 (and we should panic if THAT
4331                                  * happens).
4332                                  */
4333                                 panic("vm_map_unwire: in_transition entry");
4334                         }
4335
4336                         entry = entry->vme_next;
4337                         continue;
4338                 }
4339
4340                 if (entry->is_sub_map) {
4341                         vm_map_offset_t sub_start;
4342                         vm_map_offset_t sub_end;
4343                         vm_map_offset_t local_end;
4344                         pmap_t          pmap;
4345
4346                         vm_map_clip_start(map, entry, start);
4347                         vm_map_clip_end(map, entry, end);
4348
4349                         sub_start = entry->offset;
4350                         sub_end = entry->vme_end - entry->vme_start;
4351                         sub_end += entry->offset;
4352                         local_end = entry->vme_end;
4353                         if(map_pmap == NULL) {
4354                                 if(entry->use_pmap) {
4355                                         pmap = entry->object.sub_map->pmap;
4356                                         pmap_addr = sub_start;
4357                                 } else {
4358                                         pmap = map->pmap;
4359                                         pmap_addr = start;
4360                                 }
4361                                 if (entry->wired_count == 0 ||
4362                                     (user_wire && entry->user_wired_count == 0)) {
4363                                         if (!user_wire)
4364                                                 panic("vm_map_unwire: entry is unwired");
4365                                         entry = entry->vme_next;
4366                                         continue;
4367                                 }
4368
4369                                 /*
4370                                  * Check for holes
4371                                  * Holes: Next entry should be contiguous unless
4372                                  * this is the end of the region.
4373                                  */
4374                                 if (((entry->vme_end < end) &&
4375                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4376                                       (entry->vme_next->vme_start
4377                                        > entry->vme_end)))) {
4378                                         if (!user_wire)
4379                                                 panic("vm_map_unwire: non-contiguous region");
4380 /*
4381                                         entry = entry->vme_next;
4382                                         continue;
4383 */
4384                                 }
4385
4386                                 subtract_wire_counts(map, entry, user_wire);
4387
4388                                 if (entry->wired_count != 0) {
4389                                         entry = entry->vme_next;
4390                                         continue;
4391                                 }
4392
4393                                 entry->in_transition = TRUE;
4394                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4395
4396                                 /*
4397                                  * We can unlock the map now. The in_transition state
4398                                  * guarantees existance of the entry.
4399                                  */
4400                                 vm_map_unlock(map);
4401                                 vm_map_unwire_nested(entry->object.sub_map,
4402                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4403                                 vm_map_lock(map);
4404
4405                                 if (last_timestamp+1 != map->timestamp) {
4406                                         /*
4407                                          * Find the entry again.  It could have been
4408                                          * clipped or deleted after we unlocked the map.
4409                                          */
4410                                         if (!vm_map_lookup_entry(map,
4411                                                                  tmp_entry.vme_start,
4412                                                                  &first_entry)) {
4413                                                 if (!user_wire)
4414                                                         panic("vm_map_unwire: re-lookup failed");
4415                                                 entry = first_entry->vme_next;
4416                                         } else
4417                                                 entry = first_entry;
4418                                 }
4419                                 last_timestamp = map->timestamp;
4420
4421                                 /*
4422                                  * clear transition bit for all constituent entries
4423                                  * that were in the original entry (saved in
4424                                  * tmp_entry).  Also check for waiters.
4425                                  */
4426                                 while ((entry != vm_map_to_entry(map)) &&
4427                                        (entry->vme_start < tmp_entry.vme_end)) {
4428                                         assert(entry->in_transition);
4429                                         entry->in_transition = FALSE;
4430                                         if (entry->needs_wakeup) {
4431                                                 entry->needs_wakeup = FALSE;
4432                                                 need_wakeup = TRUE;
4433                                         }
4434                                         entry = entry->vme_next;
4435                                 }
4436                                 continue;
4437                         } else {
4438                                 vm_map_unlock(map);
4439                                 vm_map_unwire_nested(entry->object.sub_map,
4440                                                      sub_start, sub_end, user_wire, map_pmap,
4441                                                      pmap_addr);
4442                                 vm_map_lock(map);
4443
4444                                 if (last_timestamp+1 != map->timestamp) {
4445                                         /*
4446                                          * Find the entry again.  It could have been
4447                                          * clipped or deleted after we unlocked the map.
4448                                          */
4449                                         if (!vm_map_lookup_entry(map,
4450                                                                  tmp_entry.vme_start,
4451                                                                  &first_entry)) {
4452                                                 if (!user_wire)
4453                                                         panic("vm_map_unwire: re-lookup failed");
4454                                                 entry = first_entry->vme_next;
4455                                         } else
4456                                                 entry = first_entry;
4457                                 }
4458                                 last_timestamp = map->timestamp;
4459                         }
4460                 }
4461
4462
4463                 if ((entry->wired_count == 0) ||
4464                     (user_wire && entry->user_wired_count == 0)) {
4465                         if (!user_wire)
4466                                 panic("vm_map_unwire: entry is unwired");
4467
4468                         entry = entry->vme_next;
4469                         continue;
4470                 }
4471
4472                 assert(entry->wired_count > 0 &&
4473                        (!user_wire || entry->user_wired_count > 0));
4474
4475                 vm_map_clip_start(map, entry, start);
4476                 vm_map_clip_end(map, entry, end);
4477
4478                 /*
4479                  * Check for holes
4480                  * Holes: Next entry should be contiguous unless
4481                  *        this is the end of the region.
4482                  */
4483                 if (((entry->vme_end < end) &&
4484                      ((entry->vme_next == vm_map_to_entry(map)) ||
4485                       (entry->vme_next->vme_start > entry->vme_end)))) {
4486
4487                         if (!user_wire)
4488                                 panic("vm_map_unwire: non-contiguous region");
4489                         entry = entry->vme_next;
4490                         continue;
4491                 }
4492
4493                 subtract_wire_counts(map, entry, user_wire);
4494
4495                 if (entry->wired_count != 0) {
4496                         entry = entry->vme_next;
4497                         continue;
4498                 }
4499
4500                 if(entry->zero_wired_pages) {
4501                         entry->zero_wired_pages = FALSE;
4502                 }
4503
4504                 entry->in_transition = TRUE;
4505                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4506
4507                 /*
4508                  * We can unlock the map now. The in_transition state
4509                  * guarantees existance of the entry.
4510                  */
4511                 vm_map_unlock(map);
4512                 if(map_pmap) {
4513                         vm_fault_unwire(map,
4514                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4515                 } else {
4516                         vm_fault_unwire(map,
4517                                         &tmp_entry, FALSE, map->pmap,
4518                                         tmp_entry.vme_start);
4519                 }
4520                 vm_map_lock(map);
4521
4522                 if (last_timestamp+1 != map->timestamp) {
4523                         /*
4524                          * Find the entry again.  It could have been clipped
4525                          * or deleted after we unlocked the map.
4526                          */
4527                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4528                                                  &first_entry)) {
4529                                 if (!user_wire)
4530                                         panic("vm_map_unwire: re-lookup failed");
4531                                 entry = first_entry->vme_next;
4532                         } else
4533                                 entry = first_entry;
4534                 }
4535                 last_timestamp = map->timestamp;
4536
4537                 /*
4538                  * clear transition bit for all constituent entries that
4539                  * were in the original entry (saved in tmp_entry).  Also
4540                  * check for waiters.
4541                  */
4542                 while ((entry != vm_map_to_entry(map)) &&
4543                        (entry->vme_start < tmp_entry.vme_end)) {
4544                         assert(entry->in_transition);
4545                         entry->in_transition = FALSE;
4546                         if (entry->needs_wakeup) {
4547                                 entry->needs_wakeup = FALSE;
4548                                 need_wakeup = TRUE;
4549                         }
4550                         entry = entry->vme_next;
4551                 }
4552         }
4553
4554         /*
4555          * We might have fragmented the address space when we wired this
4556          * range of addresses.  Attempt to re-coalesce these VM map entries
4557          * with their neighbors now that they're no longer wired.
4558          * Under some circumstances, address space fragmentation can
4559          * prevent VM object shadow chain collapsing, which can cause
4560          * swap space leaks.
4561          */
4562         vm_map_simplify_range(map, start, end);
4563
4564         vm_map_unlock(map);
4565         /*
4566          * wake up anybody waiting on entries that we have unwired.
4567          */
4568         if (need_wakeup)
4569                 vm_map_entry_wakeup(map);
4570         return(KERN_SUCCESS);
4571
4572 }
4573
4574 kern_return_t
4575 vm_map_unwire(
4576         register vm_map_t       map,
4577         register vm_map_offset_t        start,
4578         register vm_map_offset_t        end,
4579         boolean_t               user_wire)
4580 {
4581         return vm_map_unwire_nested(map, start, end,
4582                                     user_wire, (pmap_t)NULL, 0);
4583 }
4584
4585
4586 /*
4587  *      vm_map_entry_delete:    [ internal use only ]
4588  *
4589  *      Deallocate the given entry from the target map.
4590  */
4591 static void
4592 vm_map_entry_delete(
4593         register vm_map_t       map,
4594         register vm_map_entry_t entry)
4595 {
4596         register vm_map_offset_t        s, e;
4597         register vm_object_t    object;
4598         register vm_map_t       submap;
4599
4600         s = entry->vme_start;
4601         e = entry->vme_end;
4602         assert(page_aligned(s));
4603         assert(page_aligned(e));
4604         assert(entry->wired_count == 0);
4605         assert(entry->user_wired_count == 0);
4606         assert(!entry->permanent);
4607
4608         if (entry->is_sub_map) {
4609                 object = NULL;
4610                 submap = entry->object.sub_map;
4611         } else {
4612                 submap = NULL;
4613                 object = entry->object.vm_object;
4614         }
4615
4616         vm_map_store_entry_unlink(map, entry);
4617         map->size -= e - s;
4618
4619         vm_map_entry_dispose(map, entry);
4620
4621         vm_map_unlock(map);
4622         /*
4623          *      Deallocate the object only after removing all
4624          *      pmap entries pointing to its pages.
4625          */
4626         if (submap)
4627                 vm_map_deallocate(submap);
4628         else
4629                 vm_object_deallocate(object);
4630
4631 }
4632
4633 void
4634 vm_map_submap_pmap_clean(
4635         vm_map_t        map,
4636         vm_map_offset_t start,
4637         vm_map_offset_t end,
4638         vm_map_t        sub_map,
4639         vm_map_offset_t offset)
4640 {
4641         vm_map_offset_t submap_start;
4642         vm_map_offset_t submap_end;
4643         vm_map_size_t   remove_size;
4644         vm_map_entry_t  entry;
4645
4646         submap_end = offset + (end - start);
4647         submap_start = offset;
4648
4649         vm_map_lock_read(sub_map);
4650         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4651
4652                 remove_size = (entry->vme_end - entry->vme_start);
4653                 if(offset > entry->vme_start)
4654                         remove_size -= offset - entry->vme_start;
4655
4656
4657                 if(submap_end < entry->vme_end) {
4658                         remove_size -=
4659                                 entry->vme_end - submap_end;
4660                 }
4661                 if(entry->is_sub_map) {
4662                         vm_map_submap_pmap_clean(
4663                                 sub_map,
4664                                 start,
4665                                 start + remove_size,
4666                                 entry->object.sub_map,
4667                                 entry->offset);
4668                 } else {
4669
4670                         if((map->mapped) && (map->ref_count)
4671                            && (entry->object.vm_object != NULL)) {
4672                                 vm_object_pmap_protect(
4673                                         entry->object.vm_object,
4674                                         entry->offset+(offset-entry->vme_start),
4675                                         remove_size,
4676                                         PMAP_NULL,
4677                                         entry->vme_start,
4678                                         VM_PROT_NONE);
4679                         } else {
4680                                 pmap_remove(map->pmap,
4681                                             (addr64_t)start,
4682                                             (addr64_t)(start + remove_size));
4683                         }
4684                 }
4685         }
4686
4687         entry = entry->vme_next;
4688
4689         while((entry != vm_map_to_entry(sub_map))
4690               && (entry->vme_start < submap_end)) {
4691                 remove_size = (entry->vme_end - entry->vme_start);
4692                 if(submap_end < entry->vme_end) {
4693                         remove_size -= entry->vme_end - submap_end;
4694                 }
4695                 if(entry->is_sub_map) {
4696                         vm_map_submap_pmap_clean(
4697                                 sub_map,
4698                                 (start + entry->vme_start) - offset,
4699                                 ((start + entry->vme_start) - offset) + remove_size,
4700                                 entry->object.sub_map,
4701                                 entry->offset);
4702                 } else {
4703                         if((map->mapped) && (map->ref_count)
4704                            && (entry->object.vm_object != NULL)) {
4705                                 vm_object_pmap_protect(
4706                                         entry->object.vm_object,
4707                                         entry->offset,
4708                                         remove_size,
4709                                         PMAP_NULL,
4710                                         entry->vme_start,
4711                                         VM_PROT_NONE);
4712                         } else {
4713                                 pmap_remove(map->pmap,
4714                                             (addr64_t)((start + entry->vme_start)
4715                                                        - offset),
4716                                             (addr64_t)(((start + entry->vme_start)
4717                                                         - offset) + remove_size));
4718                         }
4719                 }
4720                 entry = entry->vme_next;
4721         }
4722         vm_map_unlock_read(sub_map);
4723         return;
4724 }
4725
4726 /*
4727  *      vm_map_delete:  [ internal use only ]
4728  *
4729  *      Deallocates the given address range from the target map.
4730  *      Removes all user wirings. Unwires one kernel wiring if
4731  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4732  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4733  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4734  *
4735  *      This routine is called with map locked and leaves map locked.
4736  */
4737 static kern_return_t
4738 vm_map_delete(
4739         vm_map_t                map,
4740         vm_map_offset_t         start,
4741         vm_map_offset_t         end,
4742         int                     flags,
4743         vm_map_t                zap_map)
4744 {
4745         vm_map_entry_t          entry, next;
4746         struct   vm_map_entry   *first_entry, tmp_entry;
4747         register vm_map_offset_t s;
4748         register vm_object_t    object;
4749         boolean_t               need_wakeup;
4750         unsigned int            last_timestamp = ~0; /* unlikely value */
4751         int                     interruptible;
4752
4753         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4754                 THREAD_ABORTSAFE : THREAD_UNINT;
4755
4756         /*
4757          * All our DMA I/O operations in IOKit are currently done by
4758          * wiring through the map entries of the task requesting the I/O.
4759          * Because of this, we must always wait for kernel wirings
4760          * to go away on the entries before deleting them.
4761          *
4762          * Any caller who wants to actually remove a kernel wiring
4763          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4764          * properly remove one wiring instead of blasting through
4765          * them all.
4766          */
4767         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4768
4769         while(1) {
4770                 /*
4771                  *      Find the start of the region, and clip it
4772                  */
4773                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4774                         entry = first_entry;
4775                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4776                                 start = SUPERPAGE_ROUND_DOWN(start);
4777                                 continue;
4778                         }
4779                         if (start == entry->vme_start) {
4780                                 /*
4781                                  * No need to clip.  We don't want to cause
4782                                  * any unnecessary unnesting in this case...
4783                                  */
4784                         } else {
4785                                 vm_map_clip_start(map, entry, start);
4786                         }
4787
4788                         /*
4789                          *      Fix the lookup hint now, rather than each
4790                          *      time through the loop.
4791                          */
4792                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4793                 } else {
4794                         entry = first_entry->vme_next;
4795                 }
4796                 break;
4797         }
4798         if (entry->superpage_size)
4799                 end = SUPERPAGE_ROUND_UP(end);
4800
4801         need_wakeup = FALSE;
4802         /*
4803          *      Step through all entries in this region
4804          */
4805         s = entry->vme_start;
4806         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4807                 /*
4808                  * At this point, we have deleted all the memory entries
4809                  * between "start" and "s".  We still need to delete
4810                  * all memory entries between "s" and "end".
4811                  * While we were blocked and the map was unlocked, some
4812                  * new memory entries could have been re-allocated between
4813                  * "start" and "s" and we don't want to mess with those.
4814                  * Some of those entries could even have been re-assembled
4815                  * with an entry after "s" (in vm_map_simplify_entry()), so
4816                  * we may have to vm_map_clip_start() again.
4817                  */
4818
4819                 if (entry->vme_start >= s) {
4820                         /*
4821                          * This entry starts on or after "s"
4822                          * so no need to clip its start.
4823                          */
4824                 } else {
4825                         /*
4826                          * This entry has been re-assembled by a
4827                          * vm_map_simplify_entry().  We need to
4828                          * re-clip its start.
4829                          */
4830                         vm_map_clip_start(map, entry, s);
4831                 }
4832                 if (entry->vme_end <= end) {
4833                         /*
4834                          * This entry is going away completely, so no need
4835                          * to clip and possibly cause an unnecessary unnesting.
4836                          */
4837                 } else {
4838                         vm_map_clip_end(map, entry, end);
4839                 }
4840
4841                 if (entry->permanent) {
4842                         panic("attempt to remove permanent VM map entry "
4843                               "%p [0x%llx:0x%llx]\n",
4844                               entry, (uint64_t) s, (uint64_t) end);
4845                 }
4846
4847
4848                 if (entry->in_transition) {
4849                         wait_result_t wait_result;
4850
4851                         /*
4852                          * Another thread is wiring/unwiring this entry.
4853                          * Let the other thread know we are waiting.
4854                          */
4855                         assert(s == entry->vme_start);
4856                         entry->needs_wakeup = TRUE;
4857
4858                         /*
4859                          * wake up anybody waiting on entries that we have
4860                          * already unwired/deleted.
4861                          */
4862                         if (need_wakeup) {
4863                                 vm_map_entry_wakeup(map);
4864                                 need_wakeup = FALSE;
4865                         }
4866
4867                         wait_result = vm_map_entry_wait(map, interruptible);
4868
4869                         if (interruptible &&
4870                             wait_result == THREAD_INTERRUPTED) {
4871                                 /*
4872                                  * We do not clear the needs_wakeup flag,
4873                                  * since we cannot tell if we were the only one.
4874                                  */
4875                                 vm_map_unlock(map);
4876                                 return KERN_ABORTED;
4877                         }
4878
4879                         /*
4880                          * The entry could have been clipped or it
4881                          * may not exist anymore.  Look it up again.
4882                          */
4883                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4884                                 assert((map != kernel_map) &&
4885                                        (!entry->is_sub_map));
4886                                 /*
4887                                  * User: use the next entry
4888                                  */
4889                                 entry = first_entry->vme_next;
4890                                 s = entry->vme_start;
4891                         } else {
4892                                 entry = first_entry;
4893                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4894                         }
4895                         last_timestamp = map->timestamp;
4896                         continue;
4897                 } /* end in_transition */
4898
4899                 if (entry->wired_count) {
4900                         boolean_t       user_wire;
4901
4902                         user_wire = entry->user_wired_count > 0;
4903
4904                         /*
4905                          *      Remove a kernel wiring if requested
4906                          */
4907                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
4908                                 entry->wired_count--;
4909                         }
4910
4911                         /*
4912                          *      Remove all user wirings for proper accounting
4913                          */
4914                         if (entry->user_wired_count > 0) {
4915                                 while (entry->user_wired_count)
4916                                         subtract_wire_counts(map, entry, user_wire);
4917                         }
4918
4919                         if (entry->wired_count != 0) {
4920                                 assert(map != kernel_map);
4921                                 /*
4922                                  * Cannot continue.  Typical case is when
4923                                  * a user thread has physical io pending on
4924                                  * on this page.  Either wait for the
4925                                  * kernel wiring to go away or return an
4926                                  * error.
4927                                  */
4928                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4929                                         wait_result_t wait_result;
4930
4931                                         assert(s == entry->vme_start);
4932                                         entry->needs_wakeup = TRUE;
4933                                         wait_result = vm_map_entry_wait(map,
4934                                                                         interruptible);
4935
4936                                         if (interruptible &&
4937                                             wait_result == THREAD_INTERRUPTED) {
4938                                                 /*
4939                                                  * We do not clear the
4940                                                  * needs_wakeup flag, since we
4941                                                  * cannot tell if we were the
4942                                                  * only one.
4943                                                  */
4944                                                 vm_map_unlock(map);
4945                                                 return KERN_ABORTED;
4946                                         }
4947
4948                                         /*
4949                                          * The entry could have been clipped or
4950                                          * it may not exist anymore.  Look it
4951                                          * up again.
4952                                          */
4953                                         if (!vm_map_lookup_entry(map, s,
4954                                                                  &first_entry)) {
4955                                                 assert(map != kernel_map);
4956                                                 /*
4957                                                  * User: use the next entry
4958                                                  */
4959                                                 entry = first_entry->vme_next;
4960                                                 s = entry->vme_start;
4961                                         } else {
4962                                                 entry = first_entry;
4963                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4964                                         }
4965                                         last_timestamp = map->timestamp;
4966                                         continue;
4967                                 }
4968                                 else {
4969                                         return KERN_FAILURE;
4970                                 }
4971                         }
4972
4973                         entry->in_transition = TRUE;
4974                         /*
4975                          * copy current entry.  see comment in vm_map_wire()
4976                          */
4977                         tmp_entry = *entry;
4978                         assert(s == entry->vme_start);
4979
4980                         /*
4981                          * We can unlock the map now. The in_transition
4982                          * state guarentees existance of the entry.
4983                          */
4984                         vm_map_unlock(map);
4985
4986                         if (tmp_entry.is_sub_map) {
4987                                 vm_map_t sub_map;
4988                                 vm_map_offset_t sub_start, sub_end;
4989                                 pmap_t pmap;
4990                                 vm_map_offset_t pmap_addr;
4991
4992
4993                                 sub_map = tmp_entry.object.sub_map;
4994                                 sub_start = tmp_entry.offset;
4995                                 sub_end = sub_start + (tmp_entry.vme_end -
4996                                                        tmp_entry.vme_start);
4997                                 if (tmp_entry.use_pmap) {
4998                                         pmap = sub_map->pmap;
4999                                         pmap_addr = tmp_entry.vme_start;
5000                                 } else {
5001                                         pmap = map->pmap;
5002                                         pmap_addr = tmp_entry.vme_start;
5003                                 }
5004                                 (void) vm_map_unwire_nested(sub_map,
5005                                                             sub_start, sub_end,
5006                                                             user_wire,
5007                                                             pmap, pmap_addr);
5008                         } else {
5009
5010                                 vm_fault_unwire(map, &tmp_entry,
5011                                                 tmp_entry.object.vm_object == kernel_object,
5012                                                 map->pmap, tmp_entry.vme_start);
5013                         }
5014
5015                         vm_map_lock(map);
5016
5017                         if (last_timestamp+1 != map->timestamp) {
5018                                 /*
5019                                  * Find the entry again.  It could have
5020                                  * been clipped after we unlocked the map.
5021                                  */
5022                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
5023                                         assert((map != kernel_map) &&
5024                                                (!entry->is_sub_map));
5025                                         first_entry = first_entry->vme_next;
5026                                         s = first_entry->vme_start;
5027                                 } else {
5028                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5029                                 }
5030                         } else {
5031                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5032                                 first_entry = entry;
5033                         }
5034
5035                         last_timestamp = map->timestamp;
5036
5037                         entry = first_entry;
5038                         while ((entry != vm_map_to_entry(map)) &&
5039                                (entry->vme_start < tmp_entry.vme_end)) {
5040                                 assert(entry->in_transition);
5041                                 entry->in_transition = FALSE;
5042                                 if (entry->needs_wakeup) {
5043                                         entry->needs_wakeup = FALSE;
5044                                         need_wakeup = TRUE;
5045                                 }
5046                                 entry = entry->vme_next;
5047                         }
5048                         /*
5049                          * We have unwired the entry(s).  Go back and
5050                          * delete them.
5051                          */
5052                         entry = first_entry;
5053                         continue;
5054                 }
5055
5056                 /* entry is unwired */
5057                 assert(entry->wired_count == 0);
5058                 assert(entry->user_wired_count == 0);
5059
5060                 assert(s == entry->vme_start);
5061
5062                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5063                         /*
5064                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5065                          * vm_map_delete(), some map entries might have been
5066                          * transferred to a "zap_map", which doesn't have a
5067                          * pmap.  The original pmap has already been flushed
5068                          * in the vm_map_delete() call targeting the original
5069                          * map, but when we get to destroying the "zap_map",
5070                          * we don't have any pmap to flush, so let's just skip
5071                          * all this.
5072                          */
5073                 } else if (entry->is_sub_map) {
5074                         if (entry->use_pmap) {
5075 #ifndef NO_NESTED_PMAP
5076                                 pmap_unnest(map->pmap,
5077                                             (addr64_t)entry->vme_start,
5078                                             entry->vme_end - entry->vme_start);
5079 #endif  /* NO_NESTED_PMAP */
5080                                 if ((map->mapped) && (map->ref_count)) {
5081                                         /* clean up parent map/maps */
5082                                         vm_map_submap_pmap_clean(
5083                                                 map, entry->vme_start,
5084                                                 entry->vme_end,
5085                                                 entry->object.sub_map,
5086                                                 entry->offset);
5087                                 }
5088                         } else {
5089                                 vm_map_submap_pmap_clean(
5090                                         map, entry->vme_start, entry->vme_end,
5091                                         entry->object.sub_map,
5092                                         entry->offset);
5093                         }
5094                 } else if (entry->object.vm_object != kernel_object) {
5095                         object = entry->object.vm_object;
5096                         if((map->mapped) && (map->ref_count)) {
5097                                 vm_object_pmap_protect(
5098                                         object, entry->offset,
5099                                         entry->vme_end - entry->vme_start,
5100                                         PMAP_NULL,
5101                                         entry->vme_start,
5102                                         VM_PROT_NONE);
5103                         } else {
5104                                 pmap_remove(map->pmap,
5105                                             (addr64_t)entry->vme_start,
5106                                             (addr64_t)entry->vme_end);
5107                         }
5108                 }
5109
5110                 /*
5111                  * All pmap mappings for this map entry must have been
5112                  * cleared by now.
5113                  */
5114                 assert(vm_map_pmap_is_empty(map,
5115                                             entry->vme_start,
5116                                             entry->vme_end));
5117
5118                 next = entry->vme_next;
5119                 s = next->vme_start;
5120                 last_timestamp = map->timestamp;
5121
5122                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5123                     zap_map != VM_MAP_NULL) {
5124                         vm_map_size_t entry_size;
5125                         /*
5126                          * The caller wants to save the affected VM map entries
5127                          * into the "zap_map".  The caller will take care of
5128                          * these entries.
5129                          */
5130                         /* unlink the entry from "map" ... */
5131                         vm_map_store_entry_unlink(map, entry);
5132                         /* ... and add it to the end of the "zap_map" */
5133                         vm_map_store_entry_link(zap_map,
5134                                           vm_map_last_entry(zap_map),
5135                                           entry);
5136                         entry_size = entry->vme_end - entry->vme_start;
5137                         map->size -= entry_size;
5138                         zap_map->size += entry_size;
5139                         /* we didn't unlock the map, so no timestamp increase */
5140                         last_timestamp--;
5141                 } else {
5142                         vm_map_entry_delete(map, entry);
5143                         /* vm_map_entry_delete unlocks the map */
5144                         vm_map_lock(map);
5145                 }
5146
5147                 entry = next;
5148
5149                 if(entry == vm_map_to_entry(map)) {
5150                         break;
5151                 }
5152                 if (last_timestamp+1 != map->timestamp) {
5153                         /*
5154                          * we are responsible for deleting everything
5155                          * from the give space, if someone has interfered
5156                          * we pick up where we left off, back fills should
5157                          * be all right for anyone except map_delete and
5158                          * we have to assume that the task has been fully
5159                          * disabled before we get here
5160                          */
5161                         if (!vm_map_lookup_entry(map, s, &entry)){
5162                                 entry = entry->vme_next;
5163                                 s = entry->vme_start;
5164                         } else {
5165                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5166                         }
5167                         /*
5168                          * others can not only allocate behind us, we can
5169                          * also see coalesce while we don't have the map lock
5170                          */
5171                         if(entry == vm_map_to_entry(map)) {
5172                                 break;
5173                         }
5174                 }
5175                 last_timestamp = map->timestamp;
5176         }
5177
5178         if (map->wait_for_space)
5179                 thread_wakeup((event_t) map);
5180         /*
5181          * wake up anybody waiting on entries that we have already deleted.
5182          */
5183         if (need_wakeup)
5184                 vm_map_entry_wakeup(map);
5185
5186         return KERN_SUCCESS;
5187 }
5188
5189 /*
5190  *      vm_map_remove:
5191  *
5192  *      Remove the given address range from the target map.
5193  *      This is the exported form of vm_map_delete.
5194  */
5195 kern_return_t
5196 vm_map_remove(
5197         register vm_map_t       map,
5198         register vm_map_offset_t        start,
5199         register vm_map_offset_t        end,
5200         register boolean_t      flags)
5201 {
5202         register kern_return_t  result;
5203
5204         vm_map_lock(map);
5205         VM_MAP_RANGE_CHECK(map, start, end);
5206         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5207         vm_map_unlock(map);
5208
5209         return(result);
5210 }
5211
5212
5213 /*
5214  *      Routine:        vm_map_copy_discard
5215  *
5216  *      Description:
5217  *              Dispose of a map copy object (returned by
5218  *              vm_map_copyin).
5219  */
5220 void
5221 vm_map_copy_discard(
5222         vm_map_copy_t   copy)
5223 {
5224         if (copy == VM_MAP_COPY_NULL)
5225                 return;
5226
5227         switch (copy->type) {
5228         case VM_MAP_COPY_ENTRY_LIST:
5229                 while (vm_map_copy_first_entry(copy) !=
5230                        vm_map_copy_to_entry(copy)) {
5231                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5232
5233                         vm_map_copy_entry_unlink(copy, entry);
5234                         vm_object_deallocate(entry->object.vm_object);
5235                         vm_map_copy_entry_dispose(copy, entry);
5236                 }
5237                 break;
5238         case VM_MAP_COPY_OBJECT:
5239                 vm_object_deallocate(copy->cpy_object);
5240                 break;
5241         case VM_MAP_COPY_KERNEL_BUFFER:
5242
5243                 /*
5244                  * The vm_map_copy_t and possibly the data buffer were
5245                  * allocated by a single call to kalloc(), i.e. the
5246                  * vm_map_copy_t was not allocated out of the zone.
5247                  */
5248                 kfree(copy, copy->cpy_kalloc_size);
5249                 return;
5250         }
5251         zfree(vm_map_copy_zone, copy);
5252 }
5253
5254 /*
5255  *      Routine:        vm_map_copy_copy
5256  *
5257  *      Description:
5258  *                      Move the information in a map copy object to
5259  *                      a new map copy object, leaving the old one
5260  *                      empty.
5261  *
5262  *                      This is used by kernel routines that need
5263  *                      to look at out-of-line data (in copyin form)
5264  *                      before deciding whether to return SUCCESS.
5265  *                      If the routine returns FAILURE, the original
5266  *                      copy object will be deallocated; therefore,
5267  *                      these routines must make a copy of the copy
5268  *                      object and leave the original empty so that
5269  *                      deallocation will not fail.
5270  */
5271 vm_map_copy_t
5272 vm_map_copy_copy(
5273         vm_map_copy_t   copy)
5274 {
5275         vm_map_copy_t   new_copy;
5276
5277         if (copy == VM_MAP_COPY_NULL)
5278                 return VM_MAP_COPY_NULL;
5279
5280         /*
5281          * Allocate a new copy object, and copy the information
5282          * from the old one into it.
5283          */
5284
5285         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5286         *new_copy = *copy;
5287
5288         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5289                 /*
5290                  * The links in the entry chain must be
5291                  * changed to point to the new copy object.
5292                  */
5293                 vm_map_copy_first_entry(copy)->vme_prev
5294                         = vm_map_copy_to_entry(new_copy);
5295                 vm_map_copy_last_entry(copy)->vme_next
5296                         = vm_map_copy_to_entry(new_copy);
5297         }
5298
5299         /*
5300          * Change the old copy object into one that contains
5301          * nothing to be deallocated.
5302          */
5303         copy->type = VM_MAP_COPY_OBJECT;
5304         copy->cpy_object = VM_OBJECT_NULL;
5305
5306         /*
5307          * Return the new object.
5308          */
5309         return new_copy;
5310 }
5311
5312 static kern_return_t
5313 vm_map_overwrite_submap_recurse(
5314         vm_map_t        dst_map,
5315         vm_map_offset_t dst_addr,
5316         vm_map_size_t   dst_size)
5317 {
5318         vm_map_offset_t dst_end;
5319         vm_map_entry_t  tmp_entry;
5320         vm_map_entry_t  entry;
5321         kern_return_t   result;
5322         boolean_t       encountered_sub_map = FALSE;
5323
5324
5325
5326         /*
5327          *      Verify that the destination is all writeable
5328          *      initially.  We have to trunc the destination
5329          *      address and round the copy size or we'll end up
5330          *      splitting entries in strange ways.
5331          */
5332
5333         dst_end = vm_map_round_page(dst_addr + dst_size);
5334         vm_map_lock(dst_map);
5335
5336 start_pass_1:
5337         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5338                 vm_map_unlock(dst_map);
5339                 return(KERN_INVALID_ADDRESS);
5340         }
5341
5342         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5343         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5344
5345         for (entry = tmp_entry;;) {
5346                 vm_map_entry_t  next;
5347
5348                 next = entry->vme_next;
5349                 while(entry->is_sub_map) {
5350                         vm_map_offset_t sub_start;
5351                         vm_map_offset_t sub_end;
5352                         vm_map_offset_t local_end;
5353
5354                         if (entry->in_transition) {
5355                                 /*
5356                                  * Say that we are waiting, and wait for entry.
5357                                  */
5358                                 entry->needs_wakeup = TRUE;
5359                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5360
5361                                 goto start_pass_1;
5362                         }
5363
5364                         encountered_sub_map = TRUE;
5365                         sub_start = entry->offset;
5366
5367                         if(entry->vme_end < dst_end)
5368                                 sub_end = entry->vme_end;
5369                         else
5370                                 sub_end = dst_end;
5371                         sub_end -= entry->vme_start;
5372                         sub_end += entry->offset;
5373                         local_end = entry->vme_end;
5374                         vm_map_unlock(dst_map);
5375
5376                         result = vm_map_overwrite_submap_recurse(
5377                                 entry->object.sub_map,
5378                                 sub_start,
5379                                 sub_end - sub_start);
5380
5381                         if(result != KERN_SUCCESS)
5382                                 return result;
5383                         if (dst_end <= entry->vme_end)
5384                                 return KERN_SUCCESS;
5385                         vm_map_lock(dst_map);
5386                         if(!vm_map_lookup_entry(dst_map, local_end,
5387                                                 &tmp_entry)) {
5388                                 vm_map_unlock(dst_map);
5389                                 return(KERN_INVALID_ADDRESS);
5390                         }
5391                         entry = tmp_entry;
5392                         next = entry->vme_next;
5393                 }
5394
5395                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5396                         vm_map_unlock(dst_map);
5397                         return(KERN_PROTECTION_FAILURE);
5398                 }
5399
5400                 /*
5401                  *      If the entry is in transition, we must wait
5402                  *      for it to exit that state.  Anything could happen
5403                  *      when we unlock the map, so start over.
5404                  */
5405                 if (entry->in_transition) {
5406
5407                         /*
5408                          * Say that we are waiting, and wait for entry.
5409                          */
5410                         entry->needs_wakeup = TRUE;
5411                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5412
5413                         goto start_pass_1;
5414                 }
5415
5416 /*
5417  *              our range is contained completely within this map entry
5418  */
5419                 if (dst_end <= entry->vme_end) {
5420                         vm_map_unlock(dst_map);
5421                         return KERN_SUCCESS;
5422                 }
5423 /*
5424  *              check that range specified is contiguous region
5425  */
5426                 if ((next == vm_map_to_entry(dst_map)) ||
5427                     (next->vme_start != entry->vme_end)) {
5428                         vm_map_unlock(dst_map);
5429                         return(KERN_INVALID_ADDRESS);
5430                 }
5431
5432                 /*
5433                  *      Check for permanent objects in the destination.
5434                  */
5435                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5436                     ((!entry->object.vm_object->internal) ||
5437                      (entry->object.vm_object->true_share))) {
5438                         if(encountered_sub_map) {
5439                                 vm_map_unlock(dst_map);
5440                                 return(KERN_FAILURE);
5441                         }
5442                 }
5443
5444
5445                 entry = next;
5446         }/* for */
5447         vm_map_unlock(dst_map);
5448         return(KERN_SUCCESS);
5449 }
5450
5451 /*
5452  *      Routine:        vm_map_copy_overwrite
5453  *
5454  *      Description:
5455  *              Copy the memory described by the map copy
5456  *              object (copy; returned by vm_map_copyin) onto
5457  *              the specified destination region (dst_map, dst_addr).
5458  *              The destination must be writeable.
5459  *
5460  *              Unlike vm_map_copyout, this routine actually
5461  *              writes over previously-mapped memory.  If the
5462  *              previous mapping was to a permanent (user-supplied)
5463  *              memory object, it is preserved.
5464  *
5465  *              The attributes (protection and inheritance) of the
5466  *              destination region are preserved.
5467  *
5468  *              If successful, consumes the copy object.
5469  *              Otherwise, the caller is responsible for it.
5470  *
5471  *      Implementation notes:
5472  *              To overwrite aligned temporary virtual memory, it is
5473  *              sufficient to remove the previous mapping and insert
5474  *              the new copy.  This replacement is done either on
5475  *              the whole region (if no permanent virtual memory
5476  *              objects are embedded in the destination region) or
5477  *              in individual map entries.
5478  *
5479  *              To overwrite permanent virtual memory , it is necessary
5480  *              to copy each page, as the external memory management
5481  *              interface currently does not provide any optimizations.
5482  *
5483  *              Unaligned memory also has to be copied.  It is possible
5484  *              to use 'vm_trickery' to copy the aligned data.  This is
5485  *              not done but not hard to implement.
5486  *
5487  *              Once a page of permanent memory has been overwritten,
5488  *              it is impossible to interrupt this function; otherwise,
5489  *              the call would be neither atomic nor location-independent.
5490  *              The kernel-state portion of a user thread must be
5491  *              interruptible.
5492  *
5493  *              It may be expensive to forward all requests that might
5494  *              overwrite permanent memory (vm_write, vm_copy) to
5495  *              uninterruptible kernel threads.  This routine may be
5496  *              called by interruptible threads; however, success is
5497  *              not guaranteed -- if the request cannot be performed
5498  *              atomically and interruptibly, an error indication is
5499  *              returned.
5500  */
5501
5502 static kern_return_t
5503 vm_map_copy_overwrite_nested(
5504         vm_map_t                dst_map,
5505         vm_map_address_t        dst_addr,
5506         vm_map_copy_t           copy,
5507         boolean_t               interruptible,
5508         pmap_t                  pmap,
5509         boolean_t               discard_on_success)
5510 {
5511         vm_map_offset_t         dst_end;
5512         vm_map_entry_t          tmp_entry;
5513         vm_map_entry_t          entry;
5514         kern_return_t           kr;
5515         boolean_t               aligned = TRUE;
5516         boolean_t               contains_permanent_objects = FALSE;
5517         boolean_t               encountered_sub_map = FALSE;
5518         vm_map_offset_t         base_addr;
5519         vm_map_size_t           copy_size;
5520         vm_map_size_t           total_size;
5521
5522
5523         /*
5524          *      Check for null copy object.
5525          */
5526
5527         if (copy == VM_MAP_COPY_NULL)
5528                 return(KERN_SUCCESS);
5529
5530         /*
5531          *      Check for special kernel buffer allocated
5532          *      by new_ipc_kmsg_copyin.
5533          */
5534
5535         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5536                 return(vm_map_copyout_kernel_buffer(
5537                                dst_map, &dst_addr,
5538                                copy, TRUE));
5539         }
5540
5541         /*
5542          *      Only works for entry lists at the moment.  Will
5543          *      support page lists later.
5544          */
5545
5546         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5547
5548         if (copy->size == 0) {
5549                 if (discard_on_success)
5550                         vm_map_copy_discard(copy);
5551                 return(KERN_SUCCESS);
5552         }
5553
5554         /*
5555          *      Verify that the destination is all writeable
5556          *      initially.  We have to trunc the destination
5557          *      address and round the copy size or we'll end up
5558          *      splitting entries in strange ways.
5559          */
5560
5561         if (!page_aligned(copy->size) ||
5562             !page_aligned (copy->offset) ||
5563             !page_aligned (dst_addr))
5564         {
5565                 aligned = FALSE;
5566                 dst_end = vm_map_round_page(dst_addr + copy->size);
5567         } else {
5568                 dst_end = dst_addr + copy->size;
5569         }
5570
5571         vm_map_lock(dst_map);
5572
5573         /* LP64todo - remove this check when vm_map_commpage64()
5574          * no longer has to stuff in a map_entry for the commpage
5575          * above the map's max_offset.
5576          */
5577         if (dst_addr >= dst_map->max_offset) {
5578                 vm_map_unlock(dst_map);
5579                 return(KERN_INVALID_ADDRESS);
5580         }
5581
5582 start_pass_1:
5583         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5584                 vm_map_unlock(dst_map);
5585                 return(KERN_INVALID_ADDRESS);
5586         }
5587         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5588         for (entry = tmp_entry;;) {
5589                 vm_map_entry_t  next = entry->vme_next;
5590
5591                 while(entry->is_sub_map) {
5592                         vm_map_offset_t sub_start;
5593                         vm_map_offset_t sub_end;
5594                         vm_map_offset_t local_end;
5595
5596                         if (entry->in_transition) {
5597
5598                                 /*
5599                                  * Say that we are waiting, and wait for entry.
5600                                  */
5601                                 entry->needs_wakeup = TRUE;
5602                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5603
5604                                 goto start_pass_1;
5605                         }
5606
5607                         local_end = entry->vme_end;
5608                         if (!(entry->needs_copy)) {
5609                                 /* if needs_copy we are a COW submap */
5610                                 /* in such a case we just replace so */
5611                                 /* there is no need for the follow-  */
5612                                 /* ing check.                        */
5613                                 encountered_sub_map = TRUE;
5614                                 sub_start = entry->offset;
5615
5616                                 if(entry->vme_end < dst_end)
5617                                         sub_end = entry->vme_end;
5618                                 else
5619                                         sub_end = dst_end;
5620                                 sub_end -= entry->vme_start;
5621                                 sub_end += entry->offset;
5622                                 vm_map_unlock(dst_map);
5623
5624                                 kr = vm_map_overwrite_submap_recurse(
5625                                         entry->object.sub_map,
5626                                         sub_start,
5627                                         sub_end - sub_start);
5628                                 if(kr != KERN_SUCCESS)
5629                                         return kr;
5630                                 vm_map_lock(dst_map);
5631                         }
5632
5633                         if (dst_end <= entry->vme_end)
5634                                 goto start_overwrite;
5635                         if(!vm_map_lookup_entry(dst_map, local_end,
5636                                                 &entry)) {
5637                                 vm_map_unlock(dst_map);
5638                                 return(KERN_INVALID_ADDRESS);
5639                         }
5640                         next = entry->vme_next;
5641                 }
5642
5643                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5644                         vm_map_unlock(dst_map);
5645                         return(KERN_PROTECTION_FAILURE);
5646                 }
5647
5648                 /*
5649                  *      If the entry is in transition, we must wait
5650                  *      for it to exit that state.  Anything could happen
5651                  *      when we unlock the map, so start over.
5652                  */
5653                 if (entry->in_transition) {
5654
5655                         /*
5656                          * Say that we are waiting, and wait for entry.
5657                          */
5658                         entry->needs_wakeup = TRUE;
5659                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5660
5661                         goto start_pass_1;
5662                 }
5663
5664 /*
5665  *              our range is contained completely within this map entry
5666  */
5667                 if (dst_end <= entry->vme_end)
5668                         break;
5669 /*
5670  *              check that range specified is contiguous region
5671  */
5672                 if ((next == vm_map_to_entry(dst_map)) ||
5673                     (next->vme_start != entry->vme_end)) {
5674                         vm_map_unlock(dst_map);
5675                         return(KERN_INVALID_ADDRESS);
5676                 }
5677
5678
5679                 /*
5680                  *      Check for permanent objects in the destination.
5681                  */
5682                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5683                     ((!entry->object.vm_object->internal) ||
5684                      (entry->object.vm_object->true_share))) {
5685                         contains_permanent_objects = TRUE;
5686                 }
5687
5688                 entry = next;
5689         }/* for */
5690
5691 start_overwrite:
5692         /*
5693          *      If there are permanent objects in the destination, then
5694          *      the copy cannot be interrupted.
5695          */
5696
5697         if (interruptible && contains_permanent_objects) {
5698                 vm_map_unlock(dst_map);
5699                 return(KERN_FAILURE);   /* XXX */
5700         }
5701
5702         /*
5703          *
5704          *      Make a second pass, overwriting the data
5705          *      At the beginning of each loop iteration,
5706          *      the next entry to be overwritten is "tmp_entry"
5707          *      (initially, the value returned from the lookup above),
5708          *      and the starting address expected in that entry
5709          *      is "start".
5710          */
5711
5712         total_size = copy->size;
5713         if(encountered_sub_map) {
5714                 copy_size = 0;
5715                 /* re-calculate tmp_entry since we've had the map */
5716                 /* unlocked */
5717                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5718                         vm_map_unlock(dst_map);
5719                         return(KERN_INVALID_ADDRESS);
5720                 }
5721         } else {
5722                 copy_size = copy->size;
5723         }
5724
5725         base_addr = dst_addr;
5726         while(TRUE) {
5727                 /* deconstruct the copy object and do in parts */
5728                 /* only in sub_map, interruptable case */
5729                 vm_map_entry_t  copy_entry;
5730                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5731                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5732                 int             nentries;
5733                 int             remaining_entries = 0;
5734                 vm_map_offset_t new_offset = 0;
5735
5736                 for (entry = tmp_entry; copy_size == 0;) {
5737                         vm_map_entry_t  next;
5738
5739                         next = entry->vme_next;
5740
5741                         /* tmp_entry and base address are moved along */
5742                         /* each time we encounter a sub-map.  Otherwise */
5743                         /* entry can outpase tmp_entry, and the copy_size */
5744                         /* may reflect the distance between them */
5745                         /* if the current entry is found to be in transition */
5746                         /* we will start over at the beginning or the last */
5747                         /* encounter of a submap as dictated by base_addr */
5748                         /* we will zero copy_size accordingly. */
5749                         if (entry->in_transition) {
5750                                 /*
5751                                  * Say that we are waiting, and wait for entry.
5752                                  */
5753                                 entry->needs_wakeup = TRUE;
5754                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5755
5756                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5757                                                         &tmp_entry)) {
5758                                         vm_map_unlock(dst_map);
5759                                         return(KERN_INVALID_ADDRESS);
5760                                 }
5761                                 copy_size = 0;
5762                                 entry = tmp_entry;
5763                                 continue;
5764                         }
5765                         if(entry->is_sub_map) {
5766                                 vm_map_offset_t sub_start;
5767                                 vm_map_offset_t sub_end;
5768                                 vm_map_offset_t local_end;
5769
5770                                 if (entry->needs_copy) {
5771                                         /* if this is a COW submap */
5772                                         /* just back the range with a */
5773                                         /* anonymous entry */
5774                                         if(entry->vme_end < dst_end)
5775                                                 sub_end = entry->vme_end;
5776                                         else
5777                                                 sub_end = dst_end;
5778                                         if(entry->vme_start < base_addr)
5779                                                 sub_start = base_addr;
5780                                         else
5781                                                 sub_start = entry->vme_start;
5782                                         vm_map_clip_end(
5783                                                 dst_map, entry, sub_end);
5784                                         vm_map_clip_start(
5785                                                 dst_map, entry, sub_start);
5786                                         assert(!entry->use_pmap);
5787                                         entry->is_sub_map = FALSE;
5788                                         vm_map_deallocate(
5789                                                 entry->object.sub_map);
5790                                         entry->object.sub_map = NULL;
5791                                         entry->is_shared = FALSE;
5792                                         entry->needs_copy = FALSE;
5793                                         entry->offset = 0;
5794                                         /*
5795                                          * XXX FBDP
5796                                          * We should propagate the protections
5797                                          * of the submap entry here instead
5798                                          * of forcing them to VM_PROT_ALL...
5799                                          * Or better yet, we should inherit
5800                                          * the protection of the copy_entry.
5801                                          */
5802                                         entry->protection = VM_PROT_ALL;
5803                                         entry->max_protection = VM_PROT_ALL;
5804                                         entry->wired_count = 0;
5805                                         entry->user_wired_count = 0;
5806                                         if(entry->inheritance
5807                                            == VM_INHERIT_SHARE)
5808                                                 entry->inheritance = VM_INHERIT_COPY;
5809                                         continue;
5810                                 }
5811                                 /* first take care of any non-sub_map */
5812                                 /* entries to send */
5813                                 if(base_addr < entry->vme_start) {
5814                                         /* stuff to send */
5815                                         copy_size =
5816                                                 entry->vme_start - base_addr;
5817                                         break;
5818                                 }
5819                                 sub_start = entry->offset;
5820
5821                                 if(entry->vme_end < dst_end)
5822                                         sub_end = entry->vme_end;
5823                                 else
5824                                         sub_end = dst_end;
5825                                 sub_end -= entry->vme_start;
5826                                 sub_end += entry->offset;
5827                                 local_end = entry->vme_end;
5828                                 vm_map_unlock(dst_map);
5829                                 copy_size = sub_end - sub_start;
5830
5831                                 /* adjust the copy object */
5832                                 if (total_size > copy_size) {
5833                                         vm_map_size_t   local_size = 0;
5834                                         vm_map_size_t   entry_size;
5835
5836                                         nentries = 1;
5837                                         new_offset = copy->offset;
5838                                         copy_entry = vm_map_copy_first_entry(copy);
5839                                         while(copy_entry !=
5840                                               vm_map_copy_to_entry(copy)){
5841                                                 entry_size = copy_entry->vme_end -
5842                                                         copy_entry->vme_start;
5843                                                 if((local_size < copy_size) &&
5844                                                    ((local_size + entry_size)
5845                                                     >= copy_size)) {
5846                                                         vm_map_copy_clip_end(copy,
5847                                                                              copy_entry,
5848                                                                              copy_entry->vme_start +
5849                                                                              (copy_size - local_size));
5850                                                         entry_size = copy_entry->vme_end -
5851                                                                 copy_entry->vme_start;
5852                                                         local_size += entry_size;
5853                                                         new_offset += entry_size;
5854                                                 }
5855                                                 if(local_size >= copy_size) {
5856                                                         next_copy = copy_entry->vme_next;
5857                                                         copy_entry->vme_next =
5858                                                                 vm_map_copy_to_entry(copy);
5859                                                         previous_prev =
5860                                                                 copy->cpy_hdr.links.prev;
5861                                                         copy->cpy_hdr.links.prev = copy_entry;
5862                                                         copy->size = copy_size;
5863                                                         remaining_entries =
5864                                                                 copy->cpy_hdr.nentries;
5865                                                         remaining_entries -= nentries;
5866                                                         copy->cpy_hdr.nentries = nentries;
5867                                                         break;
5868                                                 } else {
5869                                                         local_size += entry_size;
5870                                                         new_offset += entry_size;
5871                                                         nentries++;
5872                                                 }
5873                                                 copy_entry = copy_entry->vme_next;
5874                                         }
5875                                 }
5876
5877                                 if((entry->use_pmap) && (pmap == NULL)) {
5878                                         kr = vm_map_copy_overwrite_nested(
5879                                                 entry->object.sub_map,
5880                                                 sub_start,
5881                                                 copy,
5882                                                 interruptible,
5883                                                 entry->object.sub_map->pmap,
5884                                                 TRUE);
5885                                 } else if (pmap != NULL) {
5886                                         kr = vm_map_copy_overwrite_nested(
5887                                                 entry->object.sub_map,
5888                                                 sub_start,
5889                                                 copy,
5890                                                 interruptible, pmap,
5891                                                 TRUE);
5892                                 } else {
5893                                         kr = vm_map_copy_overwrite_nested(
5894                                                 entry->object.sub_map,
5895                                                 sub_start,
5896                                                 copy,
5897                                                 interruptible,
5898                                                 dst_map->pmap,
5899                                                 TRUE);
5900                                 }
5901                                 if(kr != KERN_SUCCESS) {
5902                                         if(next_copy != NULL) {
5903                                                 copy->cpy_hdr.nentries +=
5904                                                         remaining_entries;
5905                                                 copy->cpy_hdr.links.prev->vme_next =
5906                                                         next_copy;
5907                                                 copy->cpy_hdr.links.prev
5908                                                         = previous_prev;
5909                                                 copy->size = total_size;
5910                                         }
5911                                         return kr;
5912                                 }
5913                                 if (dst_end <= local_end) {
5914                                         return(KERN_SUCCESS);
5915                                 }
5916                                 /* otherwise copy no longer exists, it was */
5917                                 /* destroyed after successful copy_overwrite */
5918                                 copy = (vm_map_copy_t)
5919                                         zalloc(vm_map_copy_zone);
5920                                 vm_map_copy_first_entry(copy) =
5921                                         vm_map_copy_last_entry(copy) =
5922                                         vm_map_copy_to_entry(copy);
5923                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
5924                                 copy->offset = new_offset;
5925
5926                                 /*
5927                                  * XXX FBDP
5928                                  * this does not seem to deal with
5929                                  * the VM map store (R&B tree)
5930                                  */
5931
5932                                 total_size -= copy_size;
5933                                 copy_size = 0;
5934                                 /* put back remainder of copy in container */
5935                                 if(next_copy != NULL) {
5936                                         copy->cpy_hdr.nentries = remaining_entries;
5937                                         copy->cpy_hdr.links.next = next_copy;
5938                                         copy->cpy_hdr.links.prev = previous_prev;
5939                                         copy->size = total_size;
5940                                         next_copy->vme_prev =
5941                                                 vm_map_copy_to_entry(copy);
5942                                         next_copy = NULL;
5943                                 }
5944                                 base_addr = local_end;
5945                                 vm_map_lock(dst_map);
5946                                 if(!vm_map_lookup_entry(dst_map,
5947                                                         local_end, &tmp_entry)) {
5948                                         vm_map_unlock(dst_map);
5949                                         return(KERN_INVALID_ADDRESS);
5950                                 }
5951                                 entry = tmp_entry;
5952                                 continue;
5953                         }
5954                         if (dst_end <= entry->vme_end) {
5955                                 copy_size = dst_end - base_addr;
5956                                 break;
5957                         }
5958
5959                         if ((next == vm_map_to_entry(dst_map)) ||
5960                             (next->vme_start != entry->vme_end)) {
5961                                 vm_map_unlock(dst_map);
5962                                 return(KERN_INVALID_ADDRESS);
5963                         }
5964
5965                         entry = next;
5966                 }/* for */
5967
5968                 next_copy = NULL;
5969                 nentries = 1;
5970
5971                 /* adjust the copy object */
5972                 if (total_size > copy_size) {
5973                         vm_map_size_t   local_size = 0;
5974                         vm_map_size_t   entry_size;
5975
5976                         new_offset = copy->offset;
5977                         copy_entry = vm_map_copy_first_entry(copy);
5978                         while(copy_entry != vm_map_copy_to_entry(copy)) {
5979                                 entry_size = copy_entry->vme_end -
5980                                         copy_entry->vme_start;
5981                                 if((local_size < copy_size) &&
5982                                    ((local_size + entry_size)
5983                                     >= copy_size)) {
5984                                         vm_map_copy_clip_end(copy, copy_entry,
5985                                                              copy_entry->vme_start +
5986                                                              (copy_size - local_size));
5987                                         entry_size = copy_entry->vme_end -
5988                                                 copy_entry->vme_start;
5989                                         local_size += entry_size;
5990                                         new_offset += entry_size;
5991                                 }
5992                                 if(local_size >= copy_size) {
5993                                         next_copy = copy_entry->vme_next;
5994                                         copy_entry->vme_next =
5995                                                 vm_map_copy_to_entry(copy);
5996                                         previous_prev =
5997                                                 copy->cpy_hdr.links.prev;
5998                                         copy->cpy_hdr.links.prev = copy_entry;
5999                                         copy->size = copy_size;
6000                                         remaining_entries =
6001                                                 copy->cpy_hdr.nentries;
6002                                         remaining_entries -= nentries;
6003                                         copy->cpy_hdr.nentries = nentries;
6004                                         break;
6005                                 } else {
6006                                         local_size += entry_size;
6007                                         new_offset += entry_size;
6008                                         nentries++;
6009                                 }
6010                                 copy_entry = copy_entry->vme_next;
6011                         }
6012                 }
6013
6014                 if (aligned) {
6015                         pmap_t  local_pmap;
6016
6017                         if(pmap)
6018                                 local_pmap = pmap;
6019                         else
6020                                 local_pmap = dst_map->pmap;
6021
6022                         if ((kr =  vm_map_copy_overwrite_aligned(
6023                                      dst_map, tmp_entry, copy,
6024                                      base_addr, local_pmap)) != KERN_SUCCESS) {
6025                                 if(next_copy != NULL) {
6026                                         copy->cpy_hdr.nentries +=
6027                                                 remaining_entries;
6028                                         copy->cpy_hdr.links.prev->vme_next =
6029                                                 next_copy;
6030                                         copy->cpy_hdr.links.prev =
6031                                                 previous_prev;
6032                                         copy->size += copy_size;
6033                                 }
6034                                 return kr;
6035                         }
6036                         vm_map_unlock(dst_map);
6037                 } else {
6038                         /*
6039                          * Performance gain:
6040                          *
6041                          * if the copy and dst address are misaligned but the same
6042                          * offset within the page we can copy_not_aligned the
6043                          * misaligned parts and copy aligned the rest.  If they are
6044                          * aligned but len is unaligned we simply need to copy
6045                          * the end bit unaligned.  We'll need to split the misaligned
6046                          * bits of the region in this case !
6047                          */
6048                         /* ALWAYS UNLOCKS THE dst_map MAP */
6049                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
6050                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6051                                 if(next_copy != NULL) {
6052                                         copy->cpy_hdr.nentries +=
6053                                                 remaining_entries;
6054                                         copy->cpy_hdr.links.prev->vme_next =
6055                                                 next_copy;
6056                                         copy->cpy_hdr.links.prev =
6057                                                 previous_prev;
6058                                         copy->size += copy_size;
6059                                 }
6060                                 return kr;
6061                         }
6062                 }
6063                 total_size -= copy_size;
6064                 if(total_size == 0)
6065                         break;
6066                 base_addr += copy_size;
6067                 copy_size = 0;
6068                 copy->offset = new_offset;
6069                 if(next_copy != NULL) {
6070                         copy->cpy_hdr.nentries = remaining_entries;
6071                         copy->cpy_hdr.links.next = next_copy;
6072                         copy->cpy_hdr.links.prev = previous_prev;
6073                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6074                         copy->size = total_size;
6075                 }
6076                 vm_map_lock(dst_map);
6077                 while(TRUE) {
6078                         if (!vm_map_lookup_entry(dst_map,
6079                                                  base_addr, &tmp_entry)) {
6080                                 vm_map_unlock(dst_map);
6081                                 return(KERN_INVALID_ADDRESS);
6082                         }
6083                         if (tmp_entry->in_transition) {
6084                                 entry->needs_wakeup = TRUE;
6085                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6086                         } else {
6087                                 break;
6088                         }
6089                 }
6090                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6091
6092                 entry = tmp_entry;
6093         } /* while */
6094
6095         /*
6096          *      Throw away the vm_map_copy object
6097          */
6098         if (discard_on_success)
6099                 vm_map_copy_discard(copy);
6100
6101         return(KERN_SUCCESS);
6102 }/* vm_map_copy_overwrite */
6103
6104 kern_return_t
6105 vm_map_copy_overwrite(
6106         vm_map_t        dst_map,
6107         vm_map_offset_t dst_addr,
6108         vm_map_copy_t   copy,
6109         boolean_t       interruptible)
6110 {
6111         vm_map_size_t   head_size, tail_size;
6112         vm_map_copy_t   head_copy, tail_copy;
6113         vm_map_offset_t head_addr, tail_addr;
6114         vm_map_entry_t  entry;
6115         kern_return_t   kr;
6116
6117         head_size = 0;
6118         tail_size = 0;
6119         head_copy = NULL;
6120         tail_copy = NULL;
6121         head_addr = 0;
6122         tail_addr = 0;
6123
6124         if (interruptible ||
6125             copy == VM_MAP_COPY_NULL ||
6126             copy->type != VM_MAP_COPY_ENTRY_LIST) {
6127                 /*
6128                  * We can't split the "copy" map if we're interruptible
6129                  * or if we don't have a "copy" map...
6130                  */
6131         blunt_copy:
6132                 return vm_map_copy_overwrite_nested(dst_map,
6133                                                     dst_addr,
6134                                                     copy,
6135                                                     interruptible,
6136                                                     (pmap_t) NULL,
6137                                                     TRUE);
6138         }
6139
6140         if (copy->size < 3 * PAGE_SIZE) {
6141                 /*
6142                  * Too small to bother with optimizing...
6143                  */
6144                 goto blunt_copy;
6145         }
6146
6147         if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6148                 /*
6149                  * Incompatible mis-alignment of source and destination...
6150                  */
6151                 goto blunt_copy;
6152         }
6153
6154         /*
6155          * Proper alignment or identical mis-alignment at the beginning.
6156          * Let's try and do a small unaligned copy first (if needed)
6157          * and then an aligned copy for the rest.
6158          */
6159         if (!page_aligned(dst_addr)) {
6160                 head_addr = dst_addr;
6161                 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6162         }
6163         if (!page_aligned(copy->offset + copy->size)) {
6164                 /*
6165                  * Mis-alignment at the end.
6166                  * Do an aligned copy up to the last page and
6167                  * then an unaligned copy for the remaining bytes.
6168                  */
6169                 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6170                 tail_addr = dst_addr + copy->size - tail_size;
6171         }
6172
6173         if (head_size + tail_size == copy->size) {
6174                 /*
6175                  * It's all unaligned, no optimization possible...
6176                  */
6177                 goto blunt_copy;
6178         }
6179
6180         /*
6181          * Can't optimize if there are any submaps in the
6182          * destination due to the way we free the "copy" map
6183          * progressively in vm_map_copy_overwrite_nested()
6184          * in that case.
6185          */
6186         vm_map_lock_read(dst_map);
6187         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6188                 vm_map_unlock_read(dst_map);
6189                 goto blunt_copy;
6190         }
6191         for (;
6192              (entry != vm_map_copy_to_entry(copy) &&
6193               entry->vme_start < dst_addr + copy->size);
6194              entry = entry->vme_next) {
6195                 if (entry->is_sub_map) {
6196                         vm_map_unlock_read(dst_map);
6197                         goto blunt_copy;
6198                 }
6199         }
6200         vm_map_unlock_read(dst_map);
6201
6202         if (head_size) {
6203                 /*
6204                  * Unaligned copy of the first "head_size" bytes, to reach
6205                  * a page boundary.
6206                  */
6207
6208                 /*
6209                  * Extract "head_copy" out of "copy".
6210                  */
6211                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6212                 vm_map_copy_first_entry(head_copy) =
6213                         vm_map_copy_to_entry(head_copy);
6214                 vm_map_copy_last_entry(head_copy) =
6215                         vm_map_copy_to_entry(head_copy);
6216                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6217                 head_copy->cpy_hdr.nentries = 0;
6218                 head_copy->cpy_hdr.entries_pageable =
6219                         copy->cpy_hdr.entries_pageable;
6220                 vm_map_store_init(&head_copy->cpy_hdr);
6221
6222                 head_copy->offset = copy->offset;
6223                 head_copy->size = head_size;
6224
6225                 copy->offset += head_size;
6226                 copy->size -= head_size;
6227
6228                 entry = vm_map_copy_first_entry(copy);
6229                 vm_map_copy_clip_end(copy, entry, copy->offset);
6230                 vm_map_copy_entry_unlink(copy, entry);
6231                 vm_map_copy_entry_link(head_copy,
6232                                        vm_map_copy_to_entry(head_copy),
6233                                        entry);
6234
6235                 /*
6236                  * Do the unaligned copy.
6237                  */
6238                 kr = vm_map_copy_overwrite_nested(dst_map,
6239                                                   head_addr,
6240                                                   head_copy,
6241                                                   interruptible,
6242                                                   (pmap_t) NULL,
6243                                                   FALSE);
6244                 if (kr != KERN_SUCCESS)
6245                         goto done;
6246         }
6247
6248         if (tail_size) {
6249                 /*
6250                  * Extract "tail_copy" out of "copy".
6251                  */
6252                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6253                 vm_map_copy_first_entry(tail_copy) =
6254                         vm_map_copy_to_entry(tail_copy);
6255                 vm_map_copy_last_entry(tail_copy) =
6256                         vm_map_copy_to_entry(tail_copy);
6257                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6258                 tail_copy->cpy_hdr.nentries = 0;
6259                 tail_copy->cpy_hdr.entries_pageable =
6260                         copy->cpy_hdr.entries_pageable;
6261                 vm_map_store_init(&tail_copy->cpy_hdr);
6262
6263                 tail_copy->offset = copy->offset + copy->size - tail_size;
6264                 tail_copy->size = tail_size;
6265
6266                 copy->size -= tail_size;
6267
6268                 entry = vm_map_copy_last_entry(copy);
6269                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6270                 entry = vm_map_copy_last_entry(copy);
6271                 vm_map_copy_entry_unlink(copy, entry);
6272                 vm_map_copy_entry_link(tail_copy,
6273                                        vm_map_copy_last_entry(tail_copy),
6274                                        entry);
6275         }
6276
6277         /*
6278          * Copy most (or possibly all) of the data.
6279          */
6280         kr = vm_map_copy_overwrite_nested(dst_map,
6281                                           dst_addr + head_size,
6282                                           copy,
6283                                           interruptible,
6284                                           (pmap_t) NULL,
6285                                           FALSE);
6286         if (kr != KERN_SUCCESS) {
6287                 goto done;
6288         }
6289
6290         if (tail_size) {
6291                 kr = vm_map_copy_overwrite_nested(dst_map,
6292                                                   tail_addr,
6293                                                   tail_copy,
6294                                                   interruptible,
6295                                                   (pmap_t) NULL,
6296                                                   FALSE);
6297         }
6298
6299 done:
6300         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6301         if (kr == KERN_SUCCESS) {
6302                 /*
6303                  * Discard all the copy maps.
6304                  */
6305                 if (head_copy) {
6306                         vm_map_copy_discard(head_copy);
6307                         head_copy = NULL;
6308                 }
6309                 vm_map_copy_discard(copy);
6310                 if (tail_copy) {
6311                         vm_map_copy_discard(tail_copy);
6312                         tail_copy = NULL;
6313                 }
6314         } else {
6315                 /*
6316                  * Re-assemble the original copy map.
6317                  */
6318                 if (head_copy) {
6319                         entry = vm_map_copy_first_entry(head_copy);
6320                         vm_map_copy_entry_unlink(head_copy, entry);
6321                         vm_map_copy_entry_link(copy,
6322                                                vm_map_copy_to_entry(copy),
6323                                                entry);
6324                         copy->offset -= head_size;
6325                         copy->size += head_size;
6326                         vm_map_copy_discard(head_copy);
6327                         head_copy = NULL;
6328                 }
6329                 if (tail_copy) {
6330                         entry = vm_map_copy_last_entry(tail_copy);
6331                         vm_map_copy_entry_unlink(tail_copy, entry);
6332                         vm_map_copy_entry_link(copy,
6333                                                vm_map_copy_last_entry(copy),
6334                                                entry);
6335                         copy->size += tail_size;
6336                         vm_map_copy_discard(tail_copy);
6337                         tail_copy = NULL;
6338                 }
6339         }
6340         return kr;
6341 }
6342
6343
6344 /*
6345  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6346  *
6347  *      Decription:
6348  *      Physically copy unaligned data
6349  *
6350  *      Implementation:
6351  *      Unaligned parts of pages have to be physically copied.  We use
6352  *      a modified form of vm_fault_copy (which understands none-aligned
6353  *      page offsets and sizes) to do the copy.  We attempt to copy as
6354  *      much memory in one go as possibly, however vm_fault_copy copies
6355  *      within 1 memory object so we have to find the smaller of "amount left"
6356  *      "source object data size" and "target object data size".  With
6357  *      unaligned data we don't need to split regions, therefore the source
6358  *      (copy) object should be one map entry, the target range may be split
6359  *      over multiple map entries however.  In any event we are pessimistic
6360  *      about these assumptions.
6361  *
6362  *      Assumptions:
6363  *      dst_map is locked on entry and is return locked on success,
6364  *      unlocked on error.
6365  */
6366
6367 static kern_return_t
6368 vm_map_copy_overwrite_unaligned(
6369         vm_map_t        dst_map,
6370         vm_map_entry_t  entry,
6371         vm_map_copy_t   copy,
6372         vm_map_offset_t start)
6373 {
6374         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6375         vm_map_version_t        version;
6376         vm_object_t             dst_object;
6377         vm_object_offset_t      dst_offset;
6378         vm_object_offset_t      src_offset;
6379         vm_object_offset_t      entry_offset;
6380         vm_map_offset_t         entry_end;
6381         vm_map_size_t           src_size,
6382                                 dst_size,
6383                                 copy_size,
6384                                 amount_left;
6385         kern_return_t           kr = KERN_SUCCESS;
6386
6387         vm_map_lock_write_to_read(dst_map);
6388
6389         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6390         amount_left = copy->size;
6391 /*
6392  *      unaligned so we never clipped this entry, we need the offset into
6393  *      the vm_object not just the data.
6394  */
6395         while (amount_left > 0) {
6396
6397                 if (entry == vm_map_to_entry(dst_map)) {
6398                         vm_map_unlock_read(dst_map);
6399                         return KERN_INVALID_ADDRESS;
6400                 }
6401
6402                 /* "start" must be within the current map entry */
6403                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6404
6405                 dst_offset = start - entry->vme_start;
6406
6407                 dst_size = entry->vme_end - start;
6408
6409                 src_size = copy_entry->vme_end -
6410                         (copy_entry->vme_start + src_offset);
6411
6412                 if (dst_size < src_size) {
6413 /*
6414  *                      we can only copy dst_size bytes before
6415  *                      we have to get the next destination entry
6416  */
6417                         copy_size = dst_size;
6418                 } else {
6419 /*
6420  *                      we can only copy src_size bytes before
6421  *                      we have to get the next source copy entry
6422  */
6423                         copy_size = src_size;
6424                 }
6425
6426                 if (copy_size > amount_left) {
6427                         copy_size = amount_left;
6428                 }
6429 /*
6430  *              Entry needs copy, create a shadow shadow object for
6431  *              Copy on write region.
6432  */
6433                 if (entry->needs_copy &&
6434                     ((entry->protection & VM_PROT_WRITE) != 0))
6435                 {
6436                         if (vm_map_lock_read_to_write(dst_map)) {
6437                                 vm_map_lock_read(dst_map);
6438                                 goto RetryLookup;
6439                         }
6440                         vm_object_shadow(&entry->object.vm_object,
6441                                          &entry->offset,
6442                                          (vm_map_size_t)(entry->vme_end
6443                                                          - entry->vme_start));
6444                         entry->needs_copy = FALSE;
6445                         vm_map_lock_write_to_read(dst_map);
6446                 }
6447                 dst_object = entry->object.vm_object;
6448 /*
6449  *              unlike with the virtual (aligned) copy we're going
6450  *              to fault on it therefore we need a target object.
6451  */
6452                 if (dst_object == VM_OBJECT_NULL) {
6453                         if (vm_map_lock_read_to_write(dst_map)) {
6454                                 vm_map_lock_read(dst_map);
6455                                 goto RetryLookup;
6456                         }
6457                         dst_object = vm_object_allocate((vm_map_size_t)
6458                                                         entry->vme_end - entry->vme_start);
6459                         entry->object.vm_object = dst_object;
6460                         entry->offset = 0;
6461                         vm_map_lock_write_to_read(dst_map);
6462                 }
6463 /*
6464  *              Take an object reference and unlock map. The "entry" may
6465  *              disappear or change when the map is unlocked.
6466  */
6467                 vm_object_reference(dst_object);
6468                 version.main_timestamp = dst_map->timestamp;
6469                 entry_offset = entry->offset;
6470                 entry_end = entry->vme_end;
6471                 vm_map_unlock_read(dst_map);
6472 /*
6473  *              Copy as much as possible in one pass
6474  */
6475                 kr = vm_fault_copy(
6476                         copy_entry->object.vm_object,
6477                         copy_entry->offset + src_offset,
6478                         &copy_size,
6479                         dst_object,
6480                         entry_offset + dst_offset,
6481                         dst_map,
6482                         &version,
6483                         THREAD_UNINT );
6484
6485                 start += copy_size;
6486                 src_offset += copy_size;
6487                 amount_left -= copy_size;
6488 /*
6489  *              Release the object reference
6490  */
6491                 vm_object_deallocate(dst_object);
6492 /*
6493  *              If a hard error occurred, return it now
6494  */
6495                 if (kr != KERN_SUCCESS)
6496                         return kr;
6497
6498                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6499                     || amount_left == 0)
6500                 {
6501 /*
6502  *                      all done with this copy entry, dispose.
6503  */
6504                         vm_map_copy_entry_unlink(copy, copy_entry);
6505                         vm_object_deallocate(copy_entry->object.vm_object);
6506                         vm_map_copy_entry_dispose(copy, copy_entry);
6507
6508                         if ((copy_entry = vm_map_copy_first_entry(copy))
6509                             == vm_map_copy_to_entry(copy) && amount_left) {
6510 /*
6511  *                              not finished copying but run out of source
6512  */
6513                                 return KERN_INVALID_ADDRESS;
6514                         }
6515                         src_offset = 0;
6516                 }
6517
6518                 if (amount_left == 0)
6519                         return KERN_SUCCESS;
6520
6521                 vm_map_lock_read(dst_map);
6522                 if (version.main_timestamp == dst_map->timestamp) {
6523                         if (start == entry_end) {
6524 /*
6525  *                              destination region is split.  Use the version
6526  *                              information to avoid a lookup in the normal
6527  *                              case.
6528  */
6529                                 entry = entry->vme_next;
6530 /*
6531  *                              should be contiguous. Fail if we encounter
6532  *                              a hole in the destination.
6533  */
6534                                 if (start != entry->vme_start) {
6535                                         vm_map_unlock_read(dst_map);
6536                                         return KERN_INVALID_ADDRESS ;
6537                                 }
6538                         }
6539                 } else {
6540 /*
6541  *                      Map version check failed.
6542  *                      we must lookup the entry because somebody
6543  *                      might have changed the map behind our backs.
6544  */
6545                 RetryLookup:
6546                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6547                         {
6548                                 vm_map_unlock_read(dst_map);
6549                                 return KERN_INVALID_ADDRESS ;
6550                         }
6551                 }
6552         }/* while */
6553
6554         return KERN_SUCCESS;
6555 }/* vm_map_copy_overwrite_unaligned */
6556
6557 /*
6558  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6559  *
6560  *      Description:
6561  *      Does all the vm_trickery possible for whole pages.
6562  *
6563  *      Implementation:
6564  *
6565  *      If there are no permanent objects in the destination,
6566  *      and the source and destination map entry zones match,
6567  *      and the destination map entry is not shared,
6568  *      then the map entries can be deleted and replaced
6569  *      with those from the copy.  The following code is the
6570  *      basic idea of what to do, but there are lots of annoying
6571  *      little details about getting protection and inheritance
6572  *      right.  Should add protection, inheritance, and sharing checks
6573  *      to the above pass and make sure that no wiring is involved.
6574  */
6575
6576 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6577 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6578 int vm_map_copy_overwrite_aligned_src_large = 0;
6579
6580 static kern_return_t
6581 vm_map_copy_overwrite_aligned(
6582         vm_map_t        dst_map,
6583         vm_map_entry_t  tmp_entry,
6584         vm_map_copy_t   copy,
6585         vm_map_offset_t start,
6586         __unused pmap_t pmap)
6587 {
6588         vm_object_t     object;
6589         vm_map_entry_t  copy_entry;
6590         vm_map_size_t   copy_size;
6591         vm_map_size_t   size;
6592         vm_map_entry_t  entry;
6593
6594         while ((copy_entry = vm_map_copy_first_entry(copy))
6595                != vm_map_copy_to_entry(copy))
6596         {
6597                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6598
6599                 entry = tmp_entry;
6600                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6601                 if (entry == vm_map_to_entry(dst_map)) {
6602                         vm_map_unlock(dst_map);
6603                         return KERN_INVALID_ADDRESS;
6604                 }
6605                 size = (entry->vme_end - entry->vme_start);
6606                 /*
6607                  *      Make sure that no holes popped up in the
6608                  *      address map, and that the protection is
6609                  *      still valid, in case the map was unlocked
6610                  *      earlier.
6611                  */
6612
6613                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6614                                                     && !entry->needs_copy)) {
6615                         vm_map_unlock(dst_map);
6616                         return(KERN_INVALID_ADDRESS);
6617                 }
6618                 assert(entry != vm_map_to_entry(dst_map));
6619
6620                 /*
6621                  *      Check protection again
6622                  */
6623
6624                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6625                         vm_map_unlock(dst_map);
6626                         return(KERN_PROTECTION_FAILURE);
6627                 }
6628
6629                 /*
6630                  *      Adjust to source size first
6631                  */
6632
6633                 if (copy_size < size) {
6634                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6635                         size = copy_size;
6636                 }
6637
6638                 /*
6639                  *      Adjust to destination size
6640                  */
6641
6642                 if (size < copy_size) {
6643                         vm_map_copy_clip_end(copy, copy_entry,
6644                                              copy_entry->vme_start + size);
6645                         copy_size = size;
6646                 }
6647
6648                 assert((entry->vme_end - entry->vme_start) == size);
6649                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6650                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6651
6652                 /*
6653                  *      If the destination contains temporary unshared memory,
6654                  *      we can perform the copy by throwing it away and
6655                  *      installing the source data.
6656                  */
6657
6658                 object = entry->object.vm_object;
6659                 if ((!entry->is_shared &&
6660                      ((object == VM_OBJECT_NULL) ||
6661                       (object->internal && !object->true_share))) ||
6662                     entry->needs_copy) {
6663                         vm_object_t     old_object = entry->object.vm_object;
6664                         vm_object_offset_t      old_offset = entry->offset;
6665                         vm_object_offset_t      offset;
6666
6667                         /*
6668                          * Ensure that the source and destination aren't
6669                          * identical
6670                          */
6671                         if (old_object == copy_entry->object.vm_object &&
6672                             old_offset == copy_entry->offset) {
6673                                 vm_map_copy_entry_unlink(copy, copy_entry);
6674                                 vm_map_copy_entry_dispose(copy, copy_entry);
6675
6676                                 if (old_object != VM_OBJECT_NULL)
6677                                         vm_object_deallocate(old_object);
6678
6679                                 start = tmp_entry->vme_end;
6680                                 tmp_entry = tmp_entry->vme_next;
6681                                 continue;
6682                         }
6683
6684 #if !CONFIG_EMBEDDED
6685 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
6686 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
6687                         if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6688                             copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6689                             copy_size <= __TRADEOFF1_COPY_SIZE) {
6690                                 /*
6691                                  * Virtual vs. Physical copy tradeoff #1.
6692                                  *
6693                                  * Copying only a few pages out of a large
6694                                  * object:  do a physical copy instead of
6695                                  * a virtual copy, to avoid possibly keeping
6696                                  * the entire large object alive because of
6697                                  * those few copy-on-write pages.
6698                                  */
6699                                 vm_map_copy_overwrite_aligned_src_large++;
6700                                 goto slow_copy;
6701                         }
6702 #endif /* !CONFIG_EMBEDDED */
6703
6704                         if (entry->alias >= VM_MEMORY_MALLOC &&
6705                             entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6706                                 vm_object_t new_object, new_shadow;
6707
6708                                 /*
6709                                  * We're about to map something over a mapping
6710                                  * established by malloc()...
6711                                  */
6712                                 new_object = copy_entry->object.vm_object;
6713                                 if (new_object != VM_OBJECT_NULL) {
6714                                         vm_object_lock_shared(new_object);
6715                                 }
6716                                 while (new_object != VM_OBJECT_NULL &&
6717 #if !CONFIG_EMBEDDED
6718                                        !new_object->true_share &&
6719                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6720 #endif /* !CONFIG_EMBEDDED */
6721                                        new_object->internal) {
6722                                         new_shadow = new_object->shadow;
6723                                         if (new_shadow == VM_OBJECT_NULL) {
6724                                                 break;
6725                                         }
6726                                         vm_object_lock_shared(new_shadow);
6727                                         vm_object_unlock(new_object);
6728                                         new_object = new_shadow;
6729                                 }
6730                                 if (new_object != VM_OBJECT_NULL) {
6731                                         if (!new_object->internal) {
6732                                                 /*
6733                                                  * The new mapping is backed
6734                                                  * by an external object.  We
6735                                                  * don't want malloc'ed memory
6736                                                  * to be replaced with such a
6737                                                  * non-anonymous mapping, so
6738                                                  * let's go off the optimized
6739                                                  * path...
6740                                                  */
6741                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
6742                                                 vm_object_unlock(new_object);
6743                                                 goto slow_copy;
6744                                         }
6745 #if !CONFIG_EMBEDDED
6746                                         if (new_object->true_share ||
6747                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6748                                                 /*
6749                                                  * Same if there's a "true_share"
6750                                                  * object in the shadow chain, or
6751                                                  * an object with a non-default
6752                                                  * (SYMMETRIC) copy strategy.
6753                                                  */
6754                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
6755                                                 vm_object_unlock(new_object);
6756                                                 goto slow_copy;
6757                                         }
6758 #endif /* !CONFIG_EMBEDDED */
6759                                         vm_object_unlock(new_object);
6760                                 }
6761                                 /*
6762                                  * The new mapping is still backed by
6763                                  * anonymous (internal) memory, so it's
6764                                  * OK to substitute it for the original
6765                                  * malloc() mapping.
6766                                  */
6767                         }
6768
6769                         if (old_object != VM_OBJECT_NULL) {
6770                                 if(entry->is_sub_map) {
6771                                         if(entry->use_pmap) {
6772 #ifndef NO_NESTED_PMAP
6773                                                 pmap_unnest(dst_map->pmap,
6774                                                             (addr64_t)entry->vme_start,
6775                                                             entry->vme_end - entry->vme_start);
6776 #endif  /* NO_NESTED_PMAP */
6777                                                 if(dst_map->mapped) {
6778                                                         /* clean up parent */
6779                                                         /* map/maps */
6780                                                         vm_map_submap_pmap_clean(
6781                                                                 dst_map, entry->vme_start,
6782                                                                 entry->vme_end,
6783                                                                 entry->object.sub_map,
6784                                                                 entry->offset);
6785                                                 }
6786                                         } else {
6787                                                 vm_map_submap_pmap_clean(
6788                                                         dst_map, entry->vme_start,
6789                                                         entry->vme_end,
6790                                                         entry->object.sub_map,
6791                                                         entry->offset);
6792                                         }
6793                                         vm_map_deallocate(
6794                                                 entry->object.sub_map);
6795                                 } else {
6796                                         if(dst_map->mapped) {
6797                                                 vm_object_pmap_protect(
6798                                                         entry->object.vm_object,
6799                                                         entry->offset,
6800                                                         entry->vme_end
6801                                                         - entry->vme_start,
6802                                                         PMAP_NULL,
6803                                                         entry->vme_start,
6804                                                         VM_PROT_NONE);
6805                                         } else {
6806                                                 pmap_remove(dst_map->pmap,
6807                                                             (addr64_t)(entry->vme_start),
6808                                                             (addr64_t)(entry->vme_end));
6809                                         }
6810                                         vm_object_deallocate(old_object);
6811                                 }
6812                         }
6813
6814                         entry->is_sub_map = FALSE;
6815                         entry->object = copy_entry->object;
6816                         object = entry->object.vm_object;
6817                         entry->needs_copy = copy_entry->needs_copy;
6818                         entry->wired_count = 0;
6819                         entry->user_wired_count = 0;
6820                         offset = entry->offset = copy_entry->offset;
6821
6822                         vm_map_copy_entry_unlink(copy, copy_entry);
6823                         vm_map_copy_entry_dispose(copy, copy_entry);
6824
6825                         /*
6826                          * we could try to push pages into the pmap at this point, BUT
6827                          * this optimization only saved on average 2 us per page if ALL
6828                          * the pages in the source were currently mapped
6829                          * and ALL the pages in the dest were touched, if there were fewer
6830                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6831                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6832                          */
6833
6834                         /*
6835                          *      Set up for the next iteration.  The map
6836                          *      has not been unlocked, so the next
6837                          *      address should be at the end of this
6838                          *      entry, and the next map entry should be
6839                          *      the one following it.
6840                          */
6841
6842                         start = tmp_entry->vme_end;
6843                         tmp_entry = tmp_entry->vme_next;
6844                 } else {
6845                         vm_map_version_t        version;
6846                         vm_object_t             dst_object;
6847                         vm_object_offset_t      dst_offset;
6848                         kern_return_t           r;
6849
6850                 slow_copy:
6851                         if (entry->needs_copy) {
6852                                 vm_object_shadow(&entry->object.vm_object,
6853                                                  &entry->offset,
6854                                                  (entry->vme_end -
6855                                                   entry->vme_start));
6856                                 entry->needs_copy = FALSE;
6857                         }
6858
6859                         dst_object = entry->object.vm_object;
6860                         dst_offset = entry->offset;
6861
6862                         /*
6863                          *      Take an object reference, and record
6864                          *      the map version information so that the
6865                          *      map can be safely unlocked.
6866                          */
6867
6868                         if (dst_object == VM_OBJECT_NULL) {
6869                                 /*
6870                                  * We would usually have just taken the
6871                                  * optimized path above if the destination
6872                                  * object has not been allocated yet.  But we
6873                                  * now disable that optimization if the copy
6874                                  * entry's object is not backed by anonymous
6875                                  * memory to avoid replacing malloc'ed
6876                                  * (i.e. re-usable) anonymous memory with a
6877                                  * not-so-anonymous mapping.
6878                                  * So we have to handle this case here and
6879                                  * allocate a new VM object for this map entry.
6880                                  */
6881                                 dst_object = vm_object_allocate(
6882                                         entry->vme_end - entry->vme_start);
6883                                 dst_offset = 0;
6884                                 entry->object.vm_object = dst_object;
6885                                 entry->offset = dst_offset;
6886
6887                         }
6888
6889                         vm_object_reference(dst_object);
6890
6891                         /* account for unlock bumping up timestamp */
6892                         version.main_timestamp = dst_map->timestamp + 1;
6893
6894                         vm_map_unlock(dst_map);
6895
6896                         /*
6897                          *      Copy as much as possible in one pass
6898                          */
6899
6900                         copy_size = size;
6901                         r = vm_fault_copy(
6902                                 copy_entry->object.vm_object,
6903                                 copy_entry->offset,
6904                                 &copy_size,
6905                                 dst_object,
6906                                 dst_offset,
6907                                 dst_map,
6908                                 &version,
6909                                 THREAD_UNINT );
6910
6911                         /*
6912                          *      Release the object reference
6913                          */
6914
6915                         vm_object_deallocate(dst_object);
6916
6917                         /*
6918                          *      If a hard error occurred, return it now
6919                          */
6920
6921                         if (r != KERN_SUCCESS)
6922                                 return(r);
6923
6924                         if (copy_size != 0) {
6925                                 /*
6926                                  *      Dispose of the copied region
6927                                  */
6928
6929                                 vm_map_copy_clip_end(copy, copy_entry,
6930                                                      copy_entry->vme_start + copy_size);
6931                                 vm_map_copy_entry_unlink(copy, copy_entry);
6932                                 vm_object_deallocate(copy_entry->object.vm_object);
6933                                 vm_map_copy_entry_dispose(copy, copy_entry);
6934                         }
6935
6936                         /*
6937                          *      Pick up in the destination map where we left off.
6938                          *
6939                          *      Use the version information to avoid a lookup
6940                          *      in the normal case.
6941                          */
6942
6943                         start += copy_size;
6944                         vm_map_lock(dst_map);
6945                         if (version.main_timestamp == dst_map->timestamp &&
6946                             copy_size != 0) {
6947                                 /* We can safely use saved tmp_entry value */
6948
6949                                 vm_map_clip_end(dst_map, tmp_entry, start);
6950                                 tmp_entry = tmp_entry->vme_next;
6951                         } else {
6952                                 /* Must do lookup of tmp_entry */
6953
6954                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6955                                         vm_map_unlock(dst_map);
6956                                         return(KERN_INVALID_ADDRESS);
6957                                 }
6958                                 vm_map_clip_start(dst_map, tmp_entry, start);
6959                         }
6960                 }
6961         }/* while */
6962
6963         return(KERN_SUCCESS);
6964 }/* vm_map_copy_overwrite_aligned */
6965
6966 /*
6967  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
6968  *
6969  *      Description:
6970  *              Copy in data to a kernel buffer from space in the
6971  *              source map. The original space may be optionally
6972  *              deallocated.
6973  *
6974  *              If successful, returns a new copy object.
6975  */
6976 static kern_return_t
6977 vm_map_copyin_kernel_buffer(
6978         vm_map_t        src_map,
6979         vm_map_offset_t src_addr,
6980         vm_map_size_t   len,
6981         boolean_t       src_destroy,
6982         vm_map_copy_t   *copy_result)
6983 {
6984         kern_return_t kr;
6985         vm_map_copy_t copy;
6986         vm_size_t kalloc_size;
6987
6988         if ((vm_size_t) len != len) {
6989                 /* "len" is too big and doesn't fit in a "vm_size_t" */
6990                 return KERN_RESOURCE_SHORTAGE;
6991         }
6992         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6993         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6994
6995         copy = (vm_map_copy_t) kalloc(kalloc_size);
6996         if (copy == VM_MAP_COPY_NULL) {
6997                 return KERN_RESOURCE_SHORTAGE;
6998         }
6999         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7000         copy->size = len;
7001         copy->offset = 0;
7002         copy->cpy_kdata = (void *) (copy + 1);
7003         copy->cpy_kalloc_size = kalloc_size;
7004
7005         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7006         if (kr != KERN_SUCCESS) {
7007                 kfree(copy, kalloc_size);
7008                 return kr;
7009         }
7010         if (src_destroy) {
7011                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
7012                                      vm_map_round_page(src_addr + len),
7013                                      VM_MAP_REMOVE_INTERRUPTIBLE |
7014                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7015                                      (src_map == kernel_map) ?
7016                                      VM_MAP_REMOVE_KUNWIRE : 0);
7017         }
7018         *copy_result = copy;
7019         return KERN_SUCCESS;
7020 }
7021
7022 /*
7023  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
7024  *
7025  *      Description:
7026  *              Copy out data from a kernel buffer into space in the
7027  *              destination map. The space may be otpionally dynamically
7028  *              allocated.
7029  *
7030  *              If successful, consumes the copy object.
7031  *              Otherwise, the caller is responsible for it.
7032  */
7033 static int vm_map_copyout_kernel_buffer_failures = 0;
7034 static kern_return_t
7035 vm_map_copyout_kernel_buffer(
7036         vm_map_t                map,
7037         vm_map_address_t        *addr,  /* IN/OUT */
7038         vm_map_copy_t           copy,
7039         boolean_t               overwrite)
7040 {
7041         kern_return_t kr = KERN_SUCCESS;
7042         thread_t thread = current_thread();
7043
7044         if (!overwrite) {
7045
7046                 /*
7047                  * Allocate space in the target map for the data
7048                  */
7049                 *addr = 0;
7050                 kr = vm_map_enter(map,
7051                                   addr,
7052                                   vm_map_round_page(copy->size),
7053                                   (vm_map_offset_t) 0,
7054                                   VM_FLAGS_ANYWHERE,
7055                                   VM_OBJECT_NULL,
7056                                   (vm_object_offset_t) 0,
7057                                   FALSE,
7058                                   VM_PROT_DEFAULT,
7059                                   VM_PROT_ALL,
7060                                   VM_INHERIT_DEFAULT);
7061                 if (kr != KERN_SUCCESS)
7062                         return kr;
7063         }
7064
7065         /*
7066          * Copyout the data from the kernel buffer to the target map.
7067          */
7068         if (thread->map == map) {
7069
7070                 /*
7071                  * If the target map is the current map, just do
7072                  * the copy.
7073                  */
7074                 assert((vm_size_t) copy->size == copy->size);
7075                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7076                         kr = KERN_INVALID_ADDRESS;
7077                 }
7078         }
7079         else {
7080                 vm_map_t oldmap;
7081
7082                 /*
7083                  * If the target map is another map, assume the
7084                  * target's address space identity for the duration
7085                  * of the copy.
7086                  */
7087                 vm_map_reference(map);
7088                 oldmap = vm_map_switch(map);
7089
7090                 assert((vm_size_t) copy->size == copy->size);
7091                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7092                         vm_map_copyout_kernel_buffer_failures++;
7093                         kr = KERN_INVALID_ADDRESS;
7094                 }
7095
7096                 (void) vm_map_switch(oldmap);
7097                 vm_map_deallocate(map);
7098         }
7099
7100         if (kr != KERN_SUCCESS) {
7101                 /* the copy failed, clean up */
7102                 if (!overwrite) {
7103                         /*
7104                          * Deallocate the space we allocated in the target map.
7105                          */
7106                         (void) vm_map_remove(map,
7107                                              vm_map_trunc_page(*addr),
7108                                              vm_map_round_page(*addr +
7109                                                                vm_map_round_page(copy->size)),
7110                                              VM_MAP_NO_FLAGS);
7111                         *addr = 0;
7112                 }
7113         } else {
7114                 /* copy was successful, dicard the copy structure */
7115                 kfree(copy, copy->cpy_kalloc_size);
7116         }
7117
7118         return kr;
7119 }
7120
7121 /*
7122  *      Macro:          vm_map_copy_insert
7123  *
7124  *      Description:
7125  *              Link a copy chain ("copy") into a map at the
7126  *              specified location (after "where").
7127  *      Side effects:
7128  *              The copy chain is destroyed.
7129  *      Warning:
7130  *              The arguments are evaluated multiple times.
7131  */
7132 #define vm_map_copy_insert(map, where, copy)                            \
7133 MACRO_BEGIN                                                             \
7134         vm_map_store_copy_insert(map, where, copy);       \
7135         zfree(vm_map_copy_zone, copy);          \
7136 MACRO_END
7137
7138 /*
7139  *      Routine:        vm_map_copyout
7140  *
7141  *      Description:
7142  *              Copy out a copy chain ("copy") into newly-allocated
7143  *              space in the destination map.
7144  *
7145  *              If successful, consumes the copy object.
7146  *              Otherwise, the caller is responsible for it.
7147  */
7148 kern_return_t
7149 vm_map_copyout(
7150         vm_map_t                dst_map,
7151         vm_map_address_t        *dst_addr,      /* OUT */
7152         vm_map_copy_t           copy)
7153 {
7154         vm_map_size_t           size;
7155         vm_map_size_t           adjustment;
7156         vm_map_offset_t         start;
7157         vm_object_offset_t      vm_copy_start;
7158         vm_map_entry_t          last;
7159         register
7160         vm_map_entry_t          entry;
7161
7162         /*
7163          *      Check for null copy object.
7164          */
7165
7166         if (copy == VM_MAP_COPY_NULL) {
7167                 *dst_addr = 0;
7168                 return(KERN_SUCCESS);
7169         }
7170
7171         /*
7172          *      Check for special copy object, created
7173          *      by vm_map_copyin_object.
7174          */
7175
7176         if (copy->type == VM_MAP_COPY_OBJECT) {
7177                 vm_object_t             object = copy->cpy_object;
7178                 kern_return_t           kr;
7179                 vm_object_offset_t      offset;
7180
7181                 offset = vm_object_trunc_page(copy->offset);
7182                 size = vm_map_round_page(copy->size +
7183                                          (vm_map_size_t)(copy->offset - offset));
7184                 *dst_addr = 0;
7185                 kr = vm_map_enter(dst_map, dst_addr, size,
7186                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7187                                   object, offset, FALSE,
7188                                   VM_PROT_DEFAULT, VM_PROT_ALL,
7189                                   VM_INHERIT_DEFAULT);
7190                 if (kr != KERN_SUCCESS)
7191                         return(kr);
7192                 /* Account for non-pagealigned copy object */
7193                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7194                 zfree(vm_map_copy_zone, copy);
7195                 return(KERN_SUCCESS);
7196         }
7197
7198         /*
7199          *      Check for special kernel buffer allocated
7200          *      by new_ipc_kmsg_copyin.
7201          */
7202
7203         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7204                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7205                                                     copy, FALSE));
7206         }
7207
7208         /*
7209          *      Find space for the data
7210          */
7211
7212         vm_copy_start = vm_object_trunc_page(copy->offset);
7213         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7214                 - vm_copy_start;
7215
7216 StartAgain: ;
7217
7218         vm_map_lock(dst_map);
7219         if( dst_map->disable_vmentry_reuse == TRUE) {
7220                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7221                 last = entry;
7222         } else {
7223                 assert(first_free_is_valid(dst_map));
7224                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7225                 vm_map_min(dst_map) : last->vme_end;
7226         }
7227
7228         while (TRUE) {
7229                 vm_map_entry_t  next = last->vme_next;
7230                 vm_map_offset_t end = start + size;
7231
7232                 if ((end > dst_map->max_offset) || (end < start)) {
7233                         if (dst_map->wait_for_space) {
7234                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7235                                         assert_wait((event_t) dst_map,
7236                                                     THREAD_INTERRUPTIBLE);
7237                                         vm_map_unlock(dst_map);
7238                                         thread_block(THREAD_CONTINUE_NULL);
7239                                         goto StartAgain;
7240                                 }
7241                         }
7242                         vm_map_unlock(dst_map);
7243                         return(KERN_NO_SPACE);
7244                 }
7245
7246                 if ((next == vm_map_to_entry(dst_map)) ||
7247                     (next->vme_start >= end))
7248                         break;
7249
7250                 last = next;
7251                 start = last->vme_end;
7252         }
7253
7254         /*
7255          *      Since we're going to just drop the map
7256          *      entries from the copy into the destination
7257          *      map, they must come from the same pool.
7258          */
7259
7260         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7261                 /*
7262                  * Mismatches occur when dealing with the default
7263                  * pager.
7264                  */
7265                 zone_t          old_zone;
7266                 vm_map_entry_t  next, new;
7267
7268                 /*
7269                  * Find the zone that the copies were allocated from
7270                  */
7271
7272                 entry = vm_map_copy_first_entry(copy);
7273
7274                 /*
7275                  * Reinitialize the copy so that vm_map_copy_entry_link
7276                  * will work.
7277                  */
7278                 vm_map_store_copy_reset(copy, entry);
7279                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7280
7281                 /*
7282                  * Copy each entry.
7283                  */
7284                 while (entry != vm_map_copy_to_entry(copy)) {
7285                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7286                         vm_map_entry_copy_full(new, entry);
7287                         new->use_pmap = FALSE;  /* clr address space specifics */
7288                         vm_map_copy_entry_link(copy,
7289                                                vm_map_copy_last_entry(copy),
7290                                                new);
7291                         next = entry->vme_next;
7292                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7293                         zfree(old_zone, entry);
7294                         entry = next;
7295                 }
7296         }
7297
7298         /*
7299          *      Adjust the addresses in the copy chain, and
7300          *      reset the region attributes.
7301          */
7302
7303         adjustment = start - vm_copy_start;
7304         for (entry = vm_map_copy_first_entry(copy);
7305              entry != vm_map_copy_to_entry(copy);
7306              entry = entry->vme_next) {
7307                 entry->vme_start += adjustment;
7308                 entry->vme_end += adjustment;
7309
7310                 entry->inheritance = VM_INHERIT_DEFAULT;
7311                 entry->protection = VM_PROT_DEFAULT;
7312                 entry->max_protection = VM_PROT_ALL;
7313                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7314
7315                 /*
7316                  * If the entry is now wired,
7317                  * map the pages into the destination map.
7318                  */
7319                 if (entry->wired_count != 0) {
7320                         register vm_map_offset_t va;
7321                         vm_object_offset_t       offset;
7322                         register vm_object_t object;
7323                         vm_prot_t prot;
7324                         int     type_of_fault;
7325
7326                         object = entry->object.vm_object;
7327                         offset = entry->offset;
7328                         va = entry->vme_start;
7329
7330                         pmap_pageable(dst_map->pmap,
7331                                       entry->vme_start,
7332                                       entry->vme_end,
7333                                       TRUE);
7334
7335                         while (va < entry->vme_end) {
7336                                 register vm_page_t      m;
7337
7338                                 /*
7339                                  * Look up the page in the object.
7340                                  * Assert that the page will be found in the
7341                                  * top object:
7342                                  * either
7343                                  *      the object was newly created by
7344                                  *      vm_object_copy_slowly, and has
7345                                  *      copies of all of the pages from
7346                                  *      the source object
7347                                  * or
7348                                  *      the object was moved from the old
7349                                  *      map entry; because the old map
7350                                  *      entry was wired, all of the pages
7351                                  *      were in the top-level object.
7352                                  *      (XXX not true if we wire pages for
7353                                  *       reading)
7354                                  */
7355                                 vm_object_lock(object);
7356
7357                                 m = vm_page_lookup(object, offset);
7358                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7359                                     m->absent)
7360                                         panic("vm_map_copyout: wiring %p", m);
7361
7362                                 /*
7363                                  * ENCRYPTED SWAP:
7364                                  * The page is assumed to be wired here, so it
7365                                  * shouldn't be encrypted.  Otherwise, we
7366                                  * couldn't enter it in the page table, since
7367                                  * we don't want the user to see the encrypted
7368                                  * data.
7369                                  */
7370                                 ASSERT_PAGE_DECRYPTED(m);
7371
7372                                 prot = entry->protection;
7373
7374                                 if (override_nx(dst_map, entry->alias) && prot)
7375                                         prot |= VM_PROT_EXECUTE;
7376
7377                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7378
7379                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7380                                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7381                                                &type_of_fault);
7382
7383                                 vm_object_unlock(object);
7384
7385                                 offset += PAGE_SIZE_64;
7386                                 va += PAGE_SIZE;
7387                         }
7388                 }
7389         }
7390
7391         /*
7392          *      Correct the page alignment for the result
7393          */
7394
7395         *dst_addr = start + (copy->offset - vm_copy_start);
7396
7397         /*
7398          *      Update the hints and the map size
7399          */
7400
7401         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7402
7403         dst_map->size += size;
7404
7405         /*
7406          *      Link in the copy
7407          */
7408
7409         vm_map_copy_insert(dst_map, last, copy);
7410
7411         vm_map_unlock(dst_map);
7412
7413         /*
7414          * XXX  If wiring_required, call vm_map_pageable
7415          */
7416
7417         return(KERN_SUCCESS);
7418 }
7419
7420 /*
7421  *      Routine:        vm_map_copyin
7422  *
7423  *      Description:
7424  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7425  *
7426  */
7427
7428 #undef vm_map_copyin
7429
7430 kern_return_t
7431 vm_map_copyin(
7432         vm_map_t                        src_map,
7433         vm_map_address_t        src_addr,
7434         vm_map_size_t           len,
7435         boolean_t                       src_destroy,
7436         vm_map_copy_t           *copy_result)   /* OUT */
7437 {
7438         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7439                                         FALSE, copy_result, FALSE));
7440 }
7441
7442 /*
7443  *      Routine:        vm_map_copyin_common
7444  *
7445  *      Description:
7446  *              Copy the specified region (src_addr, len) from the
7447  *              source address space (src_map), possibly removing
7448  *              the region from the source address space (src_destroy).
7449  *
7450  *      Returns:
7451  *              A vm_map_copy_t object (copy_result), suitable for
7452  *              insertion into another address space (using vm_map_copyout),
7453  *              copying over another address space region (using
7454  *              vm_map_copy_overwrite).  If the copy is unused, it
7455  *              should be destroyed (using vm_map_copy_discard).
7456  *
7457  *      In/out conditions:
7458  *              The source map should not be locked on entry.
7459  */
7460
7461 typedef struct submap_map {
7462         vm_map_t        parent_map;
7463         vm_map_offset_t base_start;
7464         vm_map_offset_t base_end;
7465         vm_map_size_t   base_len;
7466         struct submap_map *next;
7467 } submap_map_t;
7468
7469 kern_return_t
7470 vm_map_copyin_common(
7471         vm_map_t        src_map,
7472         vm_map_address_t src_addr,
7473         vm_map_size_t   len,
7474         boolean_t       src_destroy,
7475         __unused boolean_t      src_volatile,
7476         vm_map_copy_t   *copy_result,   /* OUT */
7477         boolean_t       use_maxprot)
7478 {
7479         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7480                                          * in multi-level lookup, this
7481                                          * entry contains the actual
7482                                          * vm_object/offset.
7483                                          */
7484         register
7485         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7486
7487         vm_map_offset_t src_start;      /* Start of current entry --
7488                                          * where copy is taking place now
7489                                          */
7490         vm_map_offset_t src_end;        /* End of entire region to be
7491                                          * copied */
7492         vm_map_offset_t src_base;
7493         vm_map_t        base_map = src_map;
7494         boolean_t       map_share=FALSE;
7495         submap_map_t    *parent_maps = NULL;
7496
7497         register
7498         vm_map_copy_t   copy;           /* Resulting copy */
7499         vm_map_address_t        copy_addr;
7500
7501         /*
7502          *      Check for copies of zero bytes.
7503          */
7504
7505         if (len == 0) {
7506                 *copy_result = VM_MAP_COPY_NULL;
7507                 return(KERN_SUCCESS);
7508         }
7509
7510         /*
7511          *      Check that the end address doesn't overflow
7512          */
7513         src_end = src_addr + len;
7514         if (src_end < src_addr)
7515                 return KERN_INVALID_ADDRESS;
7516
7517         /*
7518          * If the copy is sufficiently small, use a kernel buffer instead
7519          * of making a virtual copy.  The theory being that the cost of
7520          * setting up VM (and taking C-O-W faults) dominates the copy costs
7521          * for small regions.
7522          */
7523         if ((len < msg_ool_size_small) && !use_maxprot)
7524                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7525                                                    src_destroy, copy_result);
7526
7527         /*
7528          *      Compute (page aligned) start and end of region
7529          */
7530         src_start = vm_map_trunc_page(src_addr);
7531         src_end = vm_map_round_page(src_end);
7532
7533         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7534
7535         /*
7536          *      Allocate a header element for the list.
7537          *
7538          *      Use the start and end in the header to
7539          *      remember the endpoints prior to rounding.
7540          */
7541
7542         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7543         vm_map_copy_first_entry(copy) =
7544                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7545         copy->type = VM_MAP_COPY_ENTRY_LIST;
7546         copy->cpy_hdr.nentries = 0;
7547         copy->cpy_hdr.entries_pageable = TRUE;
7548
7549         vm_map_store_init( &(copy->cpy_hdr) );
7550
7551         copy->offset = src_addr;
7552         copy->size = len;
7553
7554         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7555
7556 #define RETURN(x)                                               \
7557         MACRO_BEGIN                                             \
7558         vm_map_unlock(src_map);                                 \
7559         if(src_map != base_map)                                 \
7560                 vm_map_deallocate(src_map);                     \
7561         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7562                 vm_map_copy_entry_dispose(copy,new_entry);      \
7563         vm_map_copy_discard(copy);                              \
7564         {                                                       \
7565                 submap_map_t    *_ptr;                          \
7566                                                                 \
7567                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7568                         parent_maps=parent_maps->next;          \
7569                         if (_ptr->parent_map != base_map)       \
7570                                 vm_map_deallocate(_ptr->parent_map);    \
7571                         kfree(_ptr, sizeof(submap_map_t));      \
7572                 }                                               \
7573         }                                                       \
7574         MACRO_RETURN(x);                                        \
7575         MACRO_END
7576
7577         /*
7578          *      Find the beginning of the region.
7579          */
7580
7581         vm_map_lock(src_map);
7582
7583         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7584                 RETURN(KERN_INVALID_ADDRESS);
7585         if(!tmp_entry->is_sub_map) {
7586                 vm_map_clip_start(src_map, tmp_entry, src_start);
7587         }
7588         /* set for later submap fix-up */
7589         copy_addr = src_start;
7590
7591         /*
7592          *      Go through entries until we get to the end.
7593          */
7594
7595         while (TRUE) {
7596                 register
7597                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7598                 vm_map_size_t   src_size;               /* Size of source
7599                                                          * map entry (in both
7600                                                          * maps)
7601                                                          */
7602
7603                 register
7604                 vm_object_t             src_object;     /* Object to copy */
7605                 vm_object_offset_t      src_offset;
7606
7607                 boolean_t       src_needs_copy;         /* Should source map
7608                                                          * be made read-only
7609                                                          * for copy-on-write?
7610                                                          */
7611
7612                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7613
7614                 boolean_t       was_wired;              /* Was source wired? */
7615                 vm_map_version_t version;               /* Version before locks
7616                                                          * dropped to make copy
7617                                                          */
7618                 kern_return_t   result;                 /* Return value from
7619                                                          * copy_strategically.
7620                                                          */
7621                 while(tmp_entry->is_sub_map) {
7622                         vm_map_size_t submap_len;
7623                         submap_map_t *ptr;
7624
7625                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7626                         ptr->next = parent_maps;
7627                         parent_maps = ptr;
7628                         ptr->parent_map = src_map;
7629                         ptr->base_start = src_start;
7630                         ptr->base_end = src_end;
7631                         submap_len = tmp_entry->vme_end - src_start;
7632                         if(submap_len > (src_end-src_start))
7633                                 submap_len = src_end-src_start;
7634                         ptr->base_len = submap_len;
7635
7636                         src_start -= tmp_entry->vme_start;
7637                         src_start += tmp_entry->offset;
7638                         src_end = src_start + submap_len;
7639                         src_map = tmp_entry->object.sub_map;
7640                         vm_map_lock(src_map);
7641                         /* keep an outstanding reference for all maps in */
7642                         /* the parents tree except the base map */
7643                         vm_map_reference(src_map);
7644                         vm_map_unlock(ptr->parent_map);
7645                         if (!vm_map_lookup_entry(
7646                                     src_map, src_start, &tmp_entry))
7647                                 RETURN(KERN_INVALID_ADDRESS);
7648                         map_share = TRUE;
7649                         if(!tmp_entry->is_sub_map)
7650                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7651                         src_entry = tmp_entry;
7652                 }
7653                 /* we are now in the lowest level submap... */
7654
7655                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7656                     (tmp_entry->object.vm_object->phys_contiguous)) {
7657                         /* This is not, supported for now.In future */
7658                         /* we will need to detect the phys_contig   */
7659                         /* condition and then upgrade copy_slowly   */
7660                         /* to do physical copy from the device mem  */
7661                         /* based object. We can piggy-back off of   */
7662                         /* the was wired boolean to set-up the      */
7663                         /* proper handling */
7664                         RETURN(KERN_PROTECTION_FAILURE);
7665                 }
7666                 /*
7667                  *      Create a new address map entry to hold the result.
7668                  *      Fill in the fields from the appropriate source entries.
7669                  *      We must unlock the source map to do this if we need
7670                  *      to allocate a map entry.
7671                  */
7672                 if (new_entry == VM_MAP_ENTRY_NULL) {
7673                         version.main_timestamp = src_map->timestamp;
7674                         vm_map_unlock(src_map);
7675
7676                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7677
7678                         vm_map_lock(src_map);
7679                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7680                                 if (!vm_map_lookup_entry(src_map, src_start,
7681                                                          &tmp_entry)) {
7682                                         RETURN(KERN_INVALID_ADDRESS);
7683                                 }
7684                                 if (!tmp_entry->is_sub_map)
7685                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7686                                 continue; /* restart w/ new tmp_entry */
7687                         }
7688                 }
7689
7690                 /*
7691                  *      Verify that the region can be read.
7692                  */
7693                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7694                      !use_maxprot) ||
7695                     (src_entry->max_protection & VM_PROT_READ) == 0)
7696                         RETURN(KERN_PROTECTION_FAILURE);
7697
7698                 /*
7699                  *      Clip against the endpoints of the entire region.
7700                  */
7701
7702                 vm_map_clip_end(src_map, src_entry, src_end);
7703
7704                 src_size = src_entry->vme_end - src_start;
7705                 src_object = src_entry->object.vm_object;
7706                 src_offset = src_entry->offset;
7707                 was_wired = (src_entry->wired_count != 0);
7708
7709                 vm_map_entry_copy(new_entry, src_entry);
7710                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7711
7712                 /*
7713                  *      Attempt non-blocking copy-on-write optimizations.
7714                  */
7715
7716                 if (src_destroy &&
7717                     (src_object == VM_OBJECT_NULL ||
7718                      (src_object->internal && !src_object->true_share
7719                       && !map_share))) {
7720                         /*
7721                          * If we are destroying the source, and the object
7722                          * is internal, we can move the object reference
7723                          * from the source to the copy.  The copy is
7724                          * copy-on-write only if the source is.
7725                          * We make another reference to the object, because
7726                          * destroying the source entry will deallocate it.
7727                          */
7728                         vm_object_reference(src_object);
7729
7730                         /*
7731                          * Copy is always unwired.  vm_map_copy_entry
7732                          * set its wired count to zero.
7733                          */
7734
7735                         goto CopySuccessful;
7736                 }
7737
7738
7739         RestartCopy:
7740                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7741                     src_object, new_entry, new_entry->object.vm_object,
7742                     was_wired, 0);
7743                 if ((src_object == VM_OBJECT_NULL ||
7744                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7745                     vm_object_copy_quickly(
7746                             &new_entry->object.vm_object,
7747                             src_offset,
7748                             src_size,
7749                             &src_needs_copy,
7750                             &new_entry_needs_copy)) {
7751
7752                         new_entry->needs_copy = new_entry_needs_copy;
7753
7754                         /*
7755                          *      Handle copy-on-write obligations
7756                          */
7757
7758                         if (src_needs_copy && !tmp_entry->needs_copy) {
7759                                 vm_prot_t prot;
7760
7761                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7762
7763                                 if (override_nx(src_map, src_entry->alias) && prot)
7764                                         prot |= VM_PROT_EXECUTE;
7765
7766                                 vm_object_pmap_protect(
7767                                         src_object,
7768                                         src_offset,
7769                                         src_size,
7770                                         (src_entry->is_shared ?
7771                                          PMAP_NULL
7772                                          : src_map->pmap),
7773                                         src_entry->vme_start,
7774                                         prot);
7775
7776                                 tmp_entry->needs_copy = TRUE;
7777                         }
7778
7779                         /*
7780                          *      The map has never been unlocked, so it's safe
7781                          *      to move to the next entry rather than doing
7782                          *      another lookup.
7783                          */
7784
7785                         goto CopySuccessful;
7786                 }
7787
7788                 /*
7789                  *      Take an object reference, so that we may
7790                  *      release the map lock(s).
7791                  */
7792
7793                 assert(src_object != VM_OBJECT_NULL);
7794                 vm_object_reference(src_object);
7795
7796                 /*
7797                  *      Record the timestamp for later verification.
7798                  *      Unlock the map.
7799                  */
7800
7801                 version.main_timestamp = src_map->timestamp;
7802                 vm_map_unlock(src_map); /* Increments timestamp once! */
7803
7804                 /*
7805                  *      Perform the copy
7806                  */
7807
7808                 if (was_wired) {
7809                 CopySlowly:
7810                         vm_object_lock(src_object);
7811                         result = vm_object_copy_slowly(
7812                                 src_object,
7813                                 src_offset,
7814                                 src_size,
7815                                 THREAD_UNINT,
7816                                 &new_entry->object.vm_object);
7817                         new_entry->offset = 0;
7818                         new_entry->needs_copy = FALSE;
7819
7820                 }
7821                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7822                          (tmp_entry->is_shared  || map_share)) {
7823                         vm_object_t new_object;
7824
7825                         vm_object_lock_shared(src_object);
7826                         new_object = vm_object_copy_delayed(
7827                                 src_object,
7828                                 src_offset,
7829                                 src_size,
7830                                 TRUE);
7831                         if (new_object == VM_OBJECT_NULL)
7832                                 goto CopySlowly;
7833
7834                         new_entry->object.vm_object = new_object;
7835                         new_entry->needs_copy = TRUE;
7836                         result = KERN_SUCCESS;
7837
7838                 } else {
7839                         result = vm_object_copy_strategically(src_object,
7840                                                               src_offset,
7841                                                               src_size,
7842                                                               &new_entry->object.vm_object,
7843                                                               &new_entry->offset,
7844                                                               &new_entry_needs_copy);
7845
7846                         new_entry->needs_copy = new_entry_needs_copy;
7847                 }
7848
7849                 if (result != KERN_SUCCESS &&
7850                     result != KERN_MEMORY_RESTART_COPY) {
7851                         vm_map_lock(src_map);
7852                         RETURN(result);
7853                 }
7854
7855                 /*
7856                  *      Throw away the extra reference
7857                  */
7858
7859                 vm_object_deallocate(src_object);
7860
7861                 /*
7862                  *      Verify that the map has not substantially
7863                  *      changed while the copy was being made.
7864                  */
7865
7866                 vm_map_lock(src_map);
7867
7868                 if ((version.main_timestamp + 1) == src_map->timestamp)
7869                         goto VerificationSuccessful;
7870
7871                 /*
7872                  *      Simple version comparison failed.
7873                  *
7874                  *      Retry the lookup and verify that the
7875                  *      same object/offset are still present.
7876                  *
7877                  *      [Note: a memory manager that colludes with
7878                  *      the calling task can detect that we have
7879                  *      cheated.  While the map was unlocked, the
7880                  *      mapping could have been changed and restored.]
7881                  */
7882
7883                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7884                         RETURN(KERN_INVALID_ADDRESS);
7885                 }
7886
7887                 src_entry = tmp_entry;
7888                 vm_map_clip_start(src_map, src_entry, src_start);
7889
7890                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7891                      !use_maxprot) ||
7892                     ((src_entry->max_protection & VM_PROT_READ) == 0))
7893                         goto VerificationFailed;
7894
7895                 if (src_entry->vme_end < new_entry->vme_end)
7896                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7897
7898                 if ((src_entry->object.vm_object != src_object) ||
7899                     (src_entry->offset != src_offset) ) {
7900
7901                         /*
7902                          *      Verification failed.
7903                          *
7904                          *      Start over with this top-level entry.
7905                          */
7906
7907                 VerificationFailed: ;
7908
7909                         vm_object_deallocate(new_entry->object.vm_object);
7910                         tmp_entry = src_entry;
7911                         continue;
7912                 }
7913
7914                 /*
7915                  *      Verification succeeded.
7916                  */
7917
7918         VerificationSuccessful: ;
7919
7920                 if (result == KERN_MEMORY_RESTART_COPY)
7921                         goto RestartCopy;
7922
7923                 /*
7924                  *      Copy succeeded.
7925                  */
7926
7927         CopySuccessful: ;
7928
7929                 /*
7930                  *      Link in the new copy entry.
7931                  */
7932
7933                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7934                                        new_entry);
7935
7936                 /*
7937                  *      Determine whether the entire region
7938                  *      has been copied.
7939                  */
7940                 src_base = src_start;
7941                 src_start = new_entry->vme_end;
7942                 new_entry = VM_MAP_ENTRY_NULL;
7943                 while ((src_start >= src_end) && (src_end != 0)) {
7944                         if (src_map != base_map) {
7945                                 submap_map_t    *ptr;
7946
7947                                 ptr = parent_maps;
7948                                 assert(ptr != NULL);
7949                                 parent_maps = parent_maps->next;
7950
7951                                 /* fix up the damage we did in that submap */
7952                                 vm_map_simplify_range(src_map,
7953                                                       src_base,
7954                                                       src_end);
7955
7956                                 vm_map_unlock(src_map);
7957                                 vm_map_deallocate(src_map);
7958                                 vm_map_lock(ptr->parent_map);
7959                                 src_map = ptr->parent_map;
7960                                 src_base = ptr->base_start;
7961                                 src_start = ptr->base_start + ptr->base_len;
7962                                 src_end = ptr->base_end;
7963                                 if ((src_end > src_start) &&
7964                                     !vm_map_lookup_entry(
7965                                             src_map, src_start, &tmp_entry))
7966                                         RETURN(KERN_INVALID_ADDRESS);
7967                                 kfree(ptr, sizeof(submap_map_t));
7968                                 if(parent_maps == NULL)
7969                                         map_share = FALSE;
7970                                 src_entry = tmp_entry->vme_prev;
7971                         } else
7972                                 break;
7973                 }
7974                 if ((src_start >= src_end) && (src_end != 0))
7975                         break;
7976
7977                 /*
7978                  *      Verify that there are no gaps in the region
7979                  */
7980
7981                 tmp_entry = src_entry->vme_next;
7982                 if ((tmp_entry->vme_start != src_start) ||
7983                     (tmp_entry == vm_map_to_entry(src_map)))
7984                         RETURN(KERN_INVALID_ADDRESS);
7985         }
7986
7987         /*
7988          * If the source should be destroyed, do it now, since the
7989          * copy was successful.
7990          */
7991         if (src_destroy) {
7992                 (void) vm_map_delete(src_map,
7993                                      vm_map_trunc_page(src_addr),
7994                                      src_end,
7995                                      (src_map == kernel_map) ?
7996                                      VM_MAP_REMOVE_KUNWIRE :
7997                                      VM_MAP_NO_FLAGS,
7998                                      VM_MAP_NULL);
7999         } else {
8000                 /* fix up the damage we did in the base map */
8001                 vm_map_simplify_range(src_map,
8002                                       vm_map_trunc_page(src_addr),
8003                                       vm_map_round_page(src_end));
8004         }
8005
8006         vm_map_unlock(src_map);
8007
8008         /* Fix-up start and end points in copy.  This is necessary */
8009         /* when the various entries in the copy object were picked */
8010         /* up from different sub-maps */
8011
8012         tmp_entry = vm_map_copy_first_entry(copy);
8013         while (tmp_entry != vm_map_copy_to_entry(copy)) {
8014                 tmp_entry->vme_end = copy_addr +
8015                         (tmp_entry->vme_end - tmp_entry->vme_start);
8016                 tmp_entry->vme_start = copy_addr;
8017                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8018                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8019                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8020         }
8021
8022         *copy_result = copy;
8023         return(KERN_SUCCESS);
8024
8025 #undef  RETURN
8026 }
8027
8028 /*
8029  *      vm_map_copyin_object:
8030  *
8031  *      Create a copy object from an object.
8032  *      Our caller donates an object reference.
8033  */
8034
8035 kern_return_t
8036 vm_map_copyin_object(
8037         vm_object_t             object,
8038         vm_object_offset_t      offset, /* offset of region in object */
8039         vm_object_size_t        size,   /* size of region in object */
8040         vm_map_copy_t   *copy_result)   /* OUT */
8041 {
8042         vm_map_copy_t   copy;           /* Resulting copy */
8043
8044         /*
8045          *      We drop the object into a special copy object
8046          *      that contains the object directly.
8047          */
8048
8049         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8050         copy->type = VM_MAP_COPY_OBJECT;
8051         copy->cpy_object = object;
8052         copy->offset = offset;
8053         copy->size = size;
8054
8055         *copy_result = copy;
8056         return(KERN_SUCCESS);
8057 }
8058
8059 static void
8060 vm_map_fork_share(
8061         vm_map_t        old_map,
8062         vm_map_entry_t  old_entry,
8063         vm_map_t        new_map)
8064 {
8065         vm_object_t     object;
8066         vm_map_entry_t  new_entry;
8067
8068         /*
8069          *      New sharing code.  New map entry
8070          *      references original object.  Internal
8071          *      objects use asynchronous copy algorithm for
8072          *      future copies.  First make sure we have
8073          *      the right object.  If we need a shadow,
8074          *      or someone else already has one, then
8075          *      make a new shadow and share it.
8076          */
8077
8078         object = old_entry->object.vm_object;
8079         if (old_entry->is_sub_map) {
8080                 assert(old_entry->wired_count == 0);
8081 #ifndef NO_NESTED_PMAP
8082                 if(old_entry->use_pmap) {
8083                         kern_return_t   result;
8084
8085                         result = pmap_nest(new_map->pmap,
8086                                            (old_entry->object.sub_map)->pmap,
8087                                            (addr64_t)old_entry->vme_start,
8088                                            (addr64_t)old_entry->vme_start,
8089                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8090                         if(result)
8091                                 panic("vm_map_fork_share: pmap_nest failed!");
8092                 }
8093 #endif  /* NO_NESTED_PMAP */
8094         } else if (object == VM_OBJECT_NULL) {
8095                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8096                                                             old_entry->vme_start));
8097                 old_entry->offset = 0;
8098                 old_entry->object.vm_object = object;
8099                 assert(!old_entry->needs_copy);
8100         } else if (object->copy_strategy !=
8101                    MEMORY_OBJECT_COPY_SYMMETRIC) {
8102
8103                 /*
8104                  *      We are already using an asymmetric
8105                  *      copy, and therefore we already have
8106                  *      the right object.
8107                  */
8108
8109                 assert(! old_entry->needs_copy);
8110         }
8111         else if (old_entry->needs_copy ||       /* case 1 */
8112                  object->shadowed ||            /* case 2 */
8113                  (!object->true_share &&        /* case 3 */
8114                   !old_entry->is_shared &&
8115                   (object->vo_size >
8116                    (vm_map_size_t)(old_entry->vme_end -
8117                                    old_entry->vme_start)))) {
8118
8119                 /*
8120                  *      We need to create a shadow.
8121                  *      There are three cases here.
8122                  *      In the first case, we need to
8123                  *      complete a deferred symmetrical
8124                  *      copy that we participated in.
8125                  *      In the second and third cases,
8126                  *      we need to create the shadow so
8127                  *      that changes that we make to the
8128                  *      object do not interfere with
8129                  *      any symmetrical copies which
8130                  *      have occured (case 2) or which
8131                  *      might occur (case 3).
8132                  *
8133                  *      The first case is when we had
8134                  *      deferred shadow object creation
8135                  *      via the entry->needs_copy mechanism.
8136                  *      This mechanism only works when
8137                  *      only one entry points to the source
8138                  *      object, and we are about to create
8139                  *      a second entry pointing to the
8140                  *      same object. The problem is that
8141                  *      there is no way of mapping from
8142                  *      an object to the entries pointing
8143                  *      to it. (Deferred shadow creation
8144                  *      works with one entry because occurs
8145                  *      at fault time, and we walk from the
8146                  *      entry to the object when handling
8147                  *      the fault.)
8148                  *
8149                  *      The second case is when the object
8150                  *      to be shared has already been copied
8151                  *      with a symmetric copy, but we point
8152                  *      directly to the object without
8153                  *      needs_copy set in our entry. (This
8154                  *      can happen because different ranges
8155                  *      of an object can be pointed to by
8156                  *      different entries. In particular,
8157                  *      a single entry pointing to an object
8158                  *      can be split by a call to vm_inherit,
8159                  *      which, combined with task_create, can
8160                  *      result in the different entries
8161                  *      having different needs_copy values.)
8162                  *      The shadowed flag in the object allows
8163                  *      us to detect this case. The problem
8164                  *      with this case is that if this object
8165                  *      has or will have shadows, then we
8166                  *      must not perform an asymmetric copy
8167                  *      of this object, since such a copy
8168                  *      allows the object to be changed, which
8169                  *      will break the previous symmetrical
8170                  *      copies (which rely upon the object
8171                  *      not changing). In a sense, the shadowed
8172                  *      flag says "don't change this object".
8173                  *      We fix this by creating a shadow
8174                  *      object for this object, and sharing
8175                  *      that. This works because we are free
8176                  *      to change the shadow object (and thus
8177                  *      to use an asymmetric copy strategy);
8178                  *      this is also semantically correct,
8179                  *      since this object is temporary, and
8180                  *      therefore a copy of the object is
8181                  *      as good as the object itself. (This
8182                  *      is not true for permanent objects,
8183                  *      since the pager needs to see changes,
8184                  *      which won't happen if the changes
8185                  *      are made to a copy.)
8186                  *
8187                  *      The third case is when the object
8188                  *      to be shared has parts sticking
8189                  *      outside of the entry we're working
8190                  *      with, and thus may in the future
8191                  *      be subject to a symmetrical copy.
8192                  *      (This is a preemptive version of
8193                  *      case 2.)
8194                  */
8195                 vm_object_shadow(&old_entry->object.vm_object,
8196                                  &old_entry->offset,
8197                                  (vm_map_size_t) (old_entry->vme_end -
8198                                                   old_entry->vme_start));
8199
8200                 /*
8201                  *      If we're making a shadow for other than
8202                  *      copy on write reasons, then we have
8203                  *      to remove write permission.
8204                  */
8205
8206                 if (!old_entry->needs_copy &&
8207                     (old_entry->protection & VM_PROT_WRITE)) {
8208                         vm_prot_t prot;
8209
8210                         prot = old_entry->protection & ~VM_PROT_WRITE;
8211
8212                         if (override_nx(old_map, old_entry->alias) && prot)
8213                                 prot |= VM_PROT_EXECUTE;
8214
8215                         if (old_map->mapped) {
8216                                 vm_object_pmap_protect(
8217                                         old_entry->object.vm_object,
8218                                         old_entry->offset,
8219                                         (old_entry->vme_end -
8220                                          old_entry->vme_start),
8221                                         PMAP_NULL,
8222                                         old_entry->vme_start,
8223                                         prot);
8224                         } else {
8225                                 pmap_protect(old_map->pmap,
8226                                              old_entry->vme_start,
8227                                              old_entry->vme_end,
8228                                              prot);
8229                         }
8230                 }
8231
8232                 old_entry->needs_copy = FALSE;
8233                 object = old_entry->object.vm_object;
8234         }
8235
8236
8237         /*
8238          *      If object was using a symmetric copy strategy,
8239          *      change its copy strategy to the default
8240          *      asymmetric copy strategy, which is copy_delay
8241          *      in the non-norma case and copy_call in the
8242          *      norma case. Bump the reference count for the
8243          *      new entry.
8244          */
8245
8246         if(old_entry->is_sub_map) {
8247                 vm_map_lock(old_entry->object.sub_map);
8248                 vm_map_reference(old_entry->object.sub_map);
8249                 vm_map_unlock(old_entry->object.sub_map);
8250         } else {
8251                 vm_object_lock(object);
8252                 vm_object_reference_locked(object);
8253                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8254                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8255                 }
8256                 vm_object_unlock(object);
8257         }
8258
8259         /*
8260          *      Clone the entry, using object ref from above.
8261          *      Mark both entries as shared.
8262          */
8263
8264         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
8265                                                           * map or descendants */
8266         vm_map_entry_copy(new_entry, old_entry);
8267         old_entry->is_shared = TRUE;
8268         new_entry->is_shared = TRUE;
8269
8270         /*
8271          *      Insert the entry into the new map -- we
8272          *      know we're inserting at the end of the new
8273          *      map.
8274          */
8275
8276         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8277
8278         /*
8279          *      Update the physical map
8280          */
8281
8282         if (old_entry->is_sub_map) {
8283                 /* Bill Angell pmap support goes here */
8284         } else {
8285                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8286                           old_entry->vme_end - old_entry->vme_start,
8287                           old_entry->vme_start);
8288         }
8289 }
8290
8291 static boolean_t
8292 vm_map_fork_copy(
8293         vm_map_t        old_map,
8294         vm_map_entry_t  *old_entry_p,
8295         vm_map_t        new_map)
8296 {
8297         vm_map_entry_t old_entry = *old_entry_p;
8298         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8299         vm_map_offset_t start = old_entry->vme_start;
8300         vm_map_copy_t copy;
8301         vm_map_entry_t last = vm_map_last_entry(new_map);
8302
8303         vm_map_unlock(old_map);
8304         /*
8305          *      Use maxprot version of copyin because we
8306          *      care about whether this memory can ever
8307          *      be accessed, not just whether it's accessible
8308          *      right now.
8309          */
8310         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8311             != KERN_SUCCESS) {
8312                 /*
8313                  *      The map might have changed while it
8314                  *      was unlocked, check it again.  Skip
8315                  *      any blank space or permanently
8316                  *      unreadable region.
8317                  */
8318                 vm_map_lock(old_map);
8319                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8320                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8321                         last = last->vme_next;
8322                 }
8323                 *old_entry_p = last;
8324
8325                 /*
8326                  * XXX  For some error returns, want to
8327                  * XXX  skip to the next element.  Note
8328                  *      that INVALID_ADDRESS and
8329                  *      PROTECTION_FAILURE are handled above.
8330                  */
8331
8332                 return FALSE;
8333         }
8334
8335         /*
8336          *      Insert the copy into the new map
8337          */
8338
8339         vm_map_copy_insert(new_map, last, copy);
8340
8341         /*
8342          *      Pick up the traversal at the end of
8343          *      the copied region.
8344          */
8345
8346         vm_map_lock(old_map);
8347         start += entry_size;
8348         if (! vm_map_lookup_entry(old_map, start, &last)) {
8349                 last = last->vme_next;
8350         } else {
8351                 if (last->vme_start == start) {
8352                         /*
8353                          * No need to clip here and we don't
8354                          * want to cause any unnecessary
8355                          * unnesting...
8356                          */
8357                 } else {
8358                         vm_map_clip_start(old_map, last, start);
8359                 }
8360         }
8361         *old_entry_p = last;
8362
8363         return TRUE;
8364 }
8365
8366 /*
8367  *      vm_map_fork:
8368  *
8369  *      Create and return a new map based on the old
8370  *      map, according to the inheritance values on the
8371  *      regions in that map.
8372  *
8373  *      The source map must not be locked.
8374  */
8375 vm_map_t
8376 vm_map_fork(
8377         vm_map_t        old_map)
8378 {
8379         pmap_t          new_pmap;
8380         vm_map_t        new_map;
8381         vm_map_entry_t  old_entry;
8382         vm_map_size_t   new_size = 0, entry_size;
8383         vm_map_entry_t  new_entry;
8384         boolean_t       src_needs_copy;
8385         boolean_t       new_entry_needs_copy;
8386
8387         new_pmap = pmap_create((vm_map_size_t) 0,
8388 #if defined(__i386__) || defined(__x86_64__)
8389                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8390 #else
8391                                0
8392 #endif
8393                                );
8394 #if defined(__i386__)
8395         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8396                 pmap_set_4GB_pagezero(new_pmap);
8397 #endif
8398
8399         vm_map_reference_swap(old_map);
8400         vm_map_lock(old_map);
8401
8402         new_map = vm_map_create(new_pmap,
8403                                 old_map->min_offset,
8404                                 old_map->max_offset,
8405                                 old_map->hdr.entries_pageable);
8406         for (
8407                 old_entry = vm_map_first_entry(old_map);
8408                 old_entry != vm_map_to_entry(old_map);
8409                 ) {
8410
8411                 entry_size = old_entry->vme_end - old_entry->vme_start;
8412
8413                 switch (old_entry->inheritance) {
8414                 case VM_INHERIT_NONE:
8415                         break;
8416
8417                 case VM_INHERIT_SHARE:
8418                         vm_map_fork_share(old_map, old_entry, new_map);
8419                         new_size += entry_size;
8420                         break;
8421
8422                 case VM_INHERIT_COPY:
8423
8424                         /*
8425                          *      Inline the copy_quickly case;
8426                          *      upon failure, fall back on call
8427                          *      to vm_map_fork_copy.
8428                          */
8429
8430                         if(old_entry->is_sub_map)
8431                                 break;
8432                         if ((old_entry->wired_count != 0) ||
8433                             ((old_entry->object.vm_object != NULL) &&
8434                              (old_entry->object.vm_object->true_share))) {
8435                                 goto slow_vm_map_fork_copy;
8436                         }
8437
8438                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
8439                         vm_map_entry_copy(new_entry, old_entry);
8440                         /* clear address space specifics */
8441                         new_entry->use_pmap = FALSE;
8442
8443                         if (! vm_object_copy_quickly(
8444                                     &new_entry->object.vm_object,
8445                                     old_entry->offset,
8446                                     (old_entry->vme_end -
8447                                      old_entry->vme_start),
8448                                     &src_needs_copy,
8449                                     &new_entry_needs_copy)) {
8450                                 vm_map_entry_dispose(new_map, new_entry);
8451                                 goto slow_vm_map_fork_copy;
8452                         }
8453
8454                         /*
8455                          *      Handle copy-on-write obligations
8456                          */
8457
8458                         if (src_needs_copy && !old_entry->needs_copy) {
8459                                 vm_prot_t prot;
8460
8461                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8462
8463                                 if (override_nx(old_map, old_entry->alias) && prot)
8464                                         prot |= VM_PROT_EXECUTE;
8465
8466                                 vm_object_pmap_protect(
8467                                         old_entry->object.vm_object,
8468                                         old_entry->offset,
8469                                         (old_entry->vme_end -
8470                                          old_entry->vme_start),
8471                                         ((old_entry->is_shared
8472                                           || old_map->mapped)
8473                                          ? PMAP_NULL :
8474                                          old_map->pmap),
8475                                         old_entry->vme_start,
8476                                         prot);
8477
8478                                 old_entry->needs_copy = TRUE;
8479                         }
8480                         new_entry->needs_copy = new_entry_needs_copy;
8481
8482                         /*
8483                          *      Insert the entry at the end
8484                          *      of the map.
8485                          */
8486
8487                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8488                                           new_entry);
8489                         new_size += entry_size;
8490                         break;
8491
8492                 slow_vm_map_fork_copy:
8493                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8494                                 new_size += entry_size;
8495                         }
8496                         continue;
8497                 }
8498                 old_entry = old_entry->vme_next;
8499         }
8500
8501         new_map->size = new_size;
8502         vm_map_unlock(old_map);
8503         vm_map_deallocate(old_map);
8504
8505         return(new_map);
8506 }
8507
8508 /*
8509  * vm_map_exec:
8510  *
8511  *      Setup the "new_map" with the proper execution environment according
8512  *      to the type of executable (platform, 64bit, chroot environment).
8513  *      Map the comm page and shared region, etc...
8514  */
8515 kern_return_t
8516 vm_map_exec(
8517         vm_map_t        new_map,
8518         task_t          task,
8519         void            *fsroot,
8520         cpu_type_t      cpu)
8521 {
8522         SHARED_REGION_TRACE_DEBUG(
8523                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8524                  current_task(), new_map, task, fsroot, cpu));
8525         (void) vm_commpage_enter(new_map, task);
8526         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8527         SHARED_REGION_TRACE_DEBUG(
8528                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8529                  current_task(), new_map, task, fsroot, cpu));
8530         return KERN_SUCCESS;
8531 }
8532
8533 /*
8534  *      vm_map_lookup_locked:
8535  *
8536  *      Finds the VM object, offset, and
8537  *      protection for a given virtual address in the
8538  *      specified map, assuming a page fault of the
8539  *      type specified.
8540  *
8541  *      Returns the (object, offset, protection) for
8542  *      this address, whether it is wired down, and whether
8543  *      this map has the only reference to the data in question.
8544  *      In order to later verify this lookup, a "version"
8545  *      is returned.
8546  *
8547  *      The map MUST be locked by the caller and WILL be
8548  *      locked on exit.  In order to guarantee the
8549  *      existence of the returned object, it is returned
8550  *      locked.
8551  *
8552  *      If a lookup is requested with "write protection"
8553  *      specified, the map may be changed to perform virtual
8554  *      copying operations, although the data referenced will
8555  *      remain the same.
8556  */
8557 kern_return_t
8558 vm_map_lookup_locked(
8559         vm_map_t                *var_map,       /* IN/OUT */
8560         vm_map_offset_t         vaddr,
8561         vm_prot_t               fault_type,
8562         int                     object_lock_type,
8563         vm_map_version_t        *out_version,   /* OUT */
8564         vm_object_t             *object,        /* OUT */
8565         vm_object_offset_t      *offset,        /* OUT */
8566         vm_prot_t               *out_prot,      /* OUT */
8567         boolean_t               *wired,         /* OUT */
8568         vm_object_fault_info_t  fault_info,     /* OUT */
8569         vm_map_t                *real_map)
8570 {
8571         vm_map_entry_t                  entry;
8572         register vm_map_t               map = *var_map;
8573         vm_map_t                        old_map = *var_map;
8574         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8575         vm_map_offset_t                 cow_parent_vaddr = 0;
8576         vm_map_offset_t                 old_start = 0;
8577         vm_map_offset_t                 old_end = 0;
8578         register vm_prot_t              prot;
8579         boolean_t                       mask_protections;
8580         vm_prot_t                       original_fault_type;
8581
8582         /*
8583          * VM_PROT_MASK means that the caller wants us to use "fault_type"
8584          * as a mask against the mapping's actual protections, not as an
8585          * absolute value.
8586          */
8587         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8588         fault_type &= ~VM_PROT_IS_MASK;
8589         original_fault_type = fault_type;
8590
8591         *real_map = map;
8592
8593 RetryLookup:
8594         fault_type = original_fault_type;
8595
8596         /*
8597          *      If the map has an interesting hint, try it before calling
8598          *      full blown lookup routine.
8599          */
8600         entry = map->hint;
8601
8602         if ((entry == vm_map_to_entry(map)) ||
8603             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8604                 vm_map_entry_t  tmp_entry;
8605
8606                 /*
8607                  *      Entry was either not a valid hint, or the vaddr
8608                  *      was not contained in the entry, so do a full lookup.
8609                  */
8610                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8611                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8612                                 vm_map_unlock(cow_sub_map_parent);
8613                         if((*real_map != map)
8614                            && (*real_map != cow_sub_map_parent))
8615                                 vm_map_unlock(*real_map);
8616                         return KERN_INVALID_ADDRESS;
8617                 }
8618
8619                 entry = tmp_entry;
8620         }
8621         if(map == old_map) {
8622                 old_start = entry->vme_start;
8623                 old_end = entry->vme_end;
8624         }
8625
8626         /*
8627          *      Handle submaps.  Drop lock on upper map, submap is
8628          *      returned locked.
8629          */
8630
8631 submap_recurse:
8632         if (entry->is_sub_map) {
8633                 vm_map_offset_t         local_vaddr;
8634                 vm_map_offset_t         end_delta;
8635                 vm_map_offset_t         start_delta;
8636                 vm_map_entry_t          submap_entry;
8637                 boolean_t               mapped_needs_copy=FALSE;
8638
8639                 local_vaddr = vaddr;
8640
8641                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8642                         /* if real_map equals map we unlock below */
8643                         if ((*real_map != map) &&
8644                             (*real_map != cow_sub_map_parent))
8645                                 vm_map_unlock(*real_map);
8646                         *real_map = entry->object.sub_map;
8647                 }
8648
8649                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8650                         if (!mapped_needs_copy) {
8651                                 if (vm_map_lock_read_to_write(map)) {
8652                                         vm_map_lock_read(map);
8653                                         /* XXX FBDP: entry still valid ? */
8654                                         if(*real_map == entry->object.sub_map)
8655                                                 *real_map = map;
8656                                         goto RetryLookup;
8657                                 }
8658                                 vm_map_lock_read(entry->object.sub_map);
8659                                 cow_sub_map_parent = map;
8660                                 /* reset base to map before cow object */
8661                                 /* this is the map which will accept   */
8662                                 /* the new cow object */
8663                                 old_start = entry->vme_start;
8664                                 old_end = entry->vme_end;
8665                                 cow_parent_vaddr = vaddr;
8666                                 mapped_needs_copy = TRUE;
8667                         } else {
8668                                 vm_map_lock_read(entry->object.sub_map);
8669                                 if((cow_sub_map_parent != map) &&
8670                                    (*real_map != map))
8671                                         vm_map_unlock(map);
8672                         }
8673                 } else {
8674                         vm_map_lock_read(entry->object.sub_map);
8675                         /* leave map locked if it is a target */
8676                         /* cow sub_map above otherwise, just  */
8677                         /* follow the maps down to the object */
8678                         /* here we unlock knowing we are not  */
8679                         /* revisiting the map.  */
8680                         if((*real_map != map) && (map != cow_sub_map_parent))
8681                                 vm_map_unlock_read(map);
8682                 }
8683
8684                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8685                 *var_map = map = entry->object.sub_map;
8686
8687                 /* calculate the offset in the submap for vaddr */
8688                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8689
8690         RetrySubMap:
8691                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8692                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8693                                 vm_map_unlock(cow_sub_map_parent);
8694                         }
8695                         if((*real_map != map)
8696                            && (*real_map != cow_sub_map_parent)) {
8697                                 vm_map_unlock(*real_map);
8698                         }
8699                         *real_map = map;
8700                         return KERN_INVALID_ADDRESS;
8701                 }
8702
8703                 /* find the attenuated shadow of the underlying object */
8704                 /* on our target map */
8705
8706                 /* in english the submap object may extend beyond the     */
8707                 /* region mapped by the entry or, may only fill a portion */
8708                 /* of it.  For our purposes, we only care if the object   */
8709                 /* doesn't fill.  In this case the area which will        */
8710                 /* ultimately be clipped in the top map will only need    */
8711                 /* to be as big as the portion of the underlying entry    */
8712                 /* which is mapped */
8713                 start_delta = submap_entry->vme_start > entry->offset ?
8714                         submap_entry->vme_start - entry->offset : 0;
8715
8716                 end_delta =
8717                         (entry->offset + start_delta + (old_end - old_start)) <=
8718                         submap_entry->vme_end ?
8719                         0 : (entry->offset +
8720                              (old_end - old_start))
8721                         - submap_entry->vme_end;
8722
8723                 old_start += start_delta;
8724                 old_end -= end_delta;
8725
8726                 if(submap_entry->is_sub_map) {
8727                         entry = submap_entry;
8728                         vaddr = local_vaddr;
8729                         goto submap_recurse;
8730                 }
8731
8732                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8733
8734                         vm_object_t     sub_object, copy_object;
8735                         vm_object_offset_t copy_offset;
8736                         vm_map_offset_t local_start;
8737                         vm_map_offset_t local_end;
8738                         boolean_t               copied_slowly = FALSE;
8739
8740                         if (vm_map_lock_read_to_write(map)) {
8741                                 vm_map_lock_read(map);
8742                                 old_start -= start_delta;
8743                                 old_end += end_delta;
8744                                 goto RetrySubMap;
8745                         }
8746
8747
8748                         sub_object = submap_entry->object.vm_object;
8749                         if (sub_object == VM_OBJECT_NULL) {
8750                                 sub_object =
8751                                         vm_object_allocate(
8752                                                 (vm_map_size_t)
8753                                                 (submap_entry->vme_end -
8754                                                  submap_entry->vme_start));
8755                                 submap_entry->object.vm_object = sub_object;
8756                                 submap_entry->offset = 0;
8757                         }
8758                         local_start =  local_vaddr -
8759                                 (cow_parent_vaddr - old_start);
8760                         local_end = local_vaddr +
8761                                 (old_end - cow_parent_vaddr);
8762                         vm_map_clip_start(map, submap_entry, local_start);
8763                         vm_map_clip_end(map, submap_entry, local_end);
8764                         /* unnesting was done in vm_map_clip_start/end() */
8765                         assert(!submap_entry->use_pmap);
8766
8767                         /* This is the COW case, lets connect */
8768                         /* an entry in our space to the underlying */
8769                         /* object in the submap, bypassing the  */
8770                         /* submap. */
8771
8772
8773                         if(submap_entry->wired_count != 0 ||
8774                            (sub_object->copy_strategy ==
8775                             MEMORY_OBJECT_COPY_NONE)) {
8776                                 vm_object_lock(sub_object);
8777                                 vm_object_copy_slowly(sub_object,
8778                                                       submap_entry->offset,
8779                                                       (submap_entry->vme_end -
8780                                                        submap_entry->vme_start),
8781                                                       FALSE,
8782                                                       &copy_object);
8783                                 copied_slowly = TRUE;
8784                         } else {
8785
8786                                 /* set up shadow object */
8787                                 copy_object = sub_object;
8788                                 vm_object_reference(copy_object);
8789                                 sub_object->shadowed = TRUE;
8790                                 submap_entry->needs_copy = TRUE;
8791
8792                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8793
8794                                 if (override_nx(map, submap_entry->alias) && prot)
8795                                         prot |= VM_PROT_EXECUTE;
8796
8797                                 vm_object_pmap_protect(
8798                                         sub_object,
8799                                         submap_entry->offset,
8800                                         submap_entry->vme_end -
8801                                         submap_entry->vme_start,
8802                                         (submap_entry->is_shared
8803                                          || map->mapped) ?
8804                                         PMAP_NULL : map->pmap,
8805                                         submap_entry->vme_start,
8806                                         prot);
8807                         }
8808
8809                         /*
8810                          * Adjust the fault offset to the submap entry.
8811                          */
8812                         copy_offset = (local_vaddr -
8813                                        submap_entry->vme_start +
8814                                        submap_entry->offset);
8815
8816                         /* This works diffently than the   */
8817                         /* normal submap case. We go back  */
8818                         /* to the parent of the cow map and*/
8819                         /* clip out the target portion of  */
8820                         /* the sub_map, substituting the   */
8821                         /* new copy object,                */
8822
8823                         vm_map_unlock(map);
8824                         local_start = old_start;
8825                         local_end = old_end;
8826                         map = cow_sub_map_parent;
8827                         *var_map = cow_sub_map_parent;
8828                         vaddr = cow_parent_vaddr;
8829                         cow_sub_map_parent = NULL;
8830
8831                         if(!vm_map_lookup_entry(map,
8832                                                 vaddr, &entry)) {
8833                                 vm_object_deallocate(
8834                                         copy_object);
8835                                 vm_map_lock_write_to_read(map);
8836                                 return KERN_INVALID_ADDRESS;
8837                         }
8838
8839                         /* clip out the portion of space */
8840                         /* mapped by the sub map which   */
8841                         /* corresponds to the underlying */
8842                         /* object */
8843
8844                         /*
8845                          * Clip (and unnest) the smallest nested chunk
8846                          * possible around the faulting address...
8847                          */
8848                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8849                         local_end = local_start + pmap_nesting_size_min;
8850                         /*
8851                          * ... but don't go beyond the "old_start" to "old_end"
8852                          * range, to avoid spanning over another VM region
8853                          * with a possibly different VM object and/or offset.
8854                          */
8855                         if (local_start < old_start) {
8856                                 local_start = old_start;
8857                         }
8858                         if (local_end > old_end) {
8859                                 local_end = old_end;
8860                         }
8861                         /*
8862                          * Adjust copy_offset to the start of the range.
8863                          */
8864                         copy_offset -= (vaddr - local_start);
8865
8866                         vm_map_clip_start(map, entry, local_start);
8867                         vm_map_clip_end(map, entry, local_end);
8868                         /* unnesting was done in vm_map_clip_start/end() */
8869                         assert(!entry->use_pmap);
8870
8871                         /* substitute copy object for */
8872                         /* shared map entry           */
8873                         vm_map_deallocate(entry->object.sub_map);
8874                         entry->is_sub_map = FALSE;
8875                         entry->object.vm_object = copy_object;
8876
8877                         /* propagate the submap entry's protections */
8878                         entry->protection |= submap_entry->protection;
8879                         entry->max_protection |= submap_entry->max_protection;
8880
8881                         if(copied_slowly) {
8882                                 entry->offset = local_start - old_start;
8883                                 entry->needs_copy = FALSE;
8884                                 entry->is_shared = FALSE;
8885                         } else {
8886                                 entry->offset = copy_offset;
8887                                 entry->needs_copy = TRUE;
8888                                 if(entry->inheritance == VM_INHERIT_SHARE)
8889                                         entry->inheritance = VM_INHERIT_COPY;
8890                                 if (map != old_map)
8891                                         entry->is_shared = TRUE;
8892                         }
8893                         if(entry->inheritance == VM_INHERIT_SHARE)
8894                                 entry->inheritance = VM_INHERIT_COPY;
8895
8896                         vm_map_lock_write_to_read(map);
8897                 } else {
8898                         if((cow_sub_map_parent)
8899                            && (cow_sub_map_parent != *real_map)
8900                            && (cow_sub_map_parent != map)) {
8901                                 vm_map_unlock(cow_sub_map_parent);
8902                         }
8903                         entry = submap_entry;
8904                         vaddr = local_vaddr;
8905                 }
8906         }
8907
8908         /*
8909          *      Check whether this task is allowed to have
8910          *      this page.
8911          */
8912
8913         prot = entry->protection;
8914
8915         if (override_nx(map, entry->alias) && prot) {
8916                 /*
8917                  * HACK -- if not a stack, then allow execution
8918                  */
8919                 prot |= VM_PROT_EXECUTE;
8920         }
8921
8922         if (mask_protections) {
8923                 fault_type &= prot;
8924                 if (fault_type == VM_PROT_NONE) {
8925                         goto protection_failure;
8926                 }
8927         }
8928         if ((fault_type & (prot)) != fault_type) {
8929         protection_failure:
8930                 if (*real_map != map) {
8931                         vm_map_unlock(*real_map);
8932                 }
8933                 *real_map = map;
8934
8935                 if ((fault_type & VM_PROT_EXECUTE) && prot)
8936                         log_stack_execution_failure((addr64_t)vaddr, prot);
8937
8938                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8939                 return KERN_PROTECTION_FAILURE;
8940         }
8941
8942         /*
8943          *      If this page is not pageable, we have to get
8944          *      it for all possible accesses.
8945          */
8946
8947         *wired = (entry->wired_count != 0);
8948         if (*wired)
8949                 fault_type = prot;
8950
8951         /*
8952          *      If the entry was copy-on-write, we either ...
8953          */
8954
8955         if (entry->needs_copy) {
8956                 /*
8957                  *      If we want to write the page, we may as well
8958                  *      handle that now since we've got the map locked.
8959                  *
8960                  *      If we don't need to write the page, we just
8961                  *      demote the permissions allowed.
8962                  */
8963
8964                 if ((fault_type & VM_PROT_WRITE) || *wired) {
8965                         /*
8966                          *      Make a new object, and place it in the
8967                          *      object chain.  Note that no new references
8968                          *      have appeared -- one just moved from the
8969                          *      map to the new object.
8970                          */
8971
8972                         if (vm_map_lock_read_to_write(map)) {
8973                                 vm_map_lock_read(map);
8974                                 goto RetryLookup;
8975                         }
8976                         vm_object_shadow(&entry->object.vm_object,
8977                                          &entry->offset,
8978                                          (vm_map_size_t) (entry->vme_end -
8979                                                           entry->vme_start));
8980
8981                         entry->object.vm_object->shadowed = TRUE;
8982                         entry->needs_copy = FALSE;
8983                         vm_map_lock_write_to_read(map);
8984                 }
8985                 else {
8986                         /*
8987                          *      We're attempting to read a copy-on-write
8988                          *      page -- don't allow writes.
8989                          */
8990
8991                         prot &= (~VM_PROT_WRITE);
8992                 }
8993         }
8994
8995         /*
8996          *      Create an object if necessary.
8997          */
8998         if (entry->object.vm_object == VM_OBJECT_NULL) {
8999
9000                 if (vm_map_lock_read_to_write(map)) {
9001                         vm_map_lock_read(map);
9002                         goto RetryLookup;
9003                 }
9004
9005                 entry->object.vm_object = vm_object_allocate(
9006                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
9007                 entry->offset = 0;
9008                 vm_map_lock_write_to_read(map);
9009         }
9010
9011         /*
9012          *      Return the object/offset from this entry.  If the entry
9013          *      was copy-on-write or empty, it has been fixed up.  Also
9014          *      return the protection.
9015          */
9016
9017         *offset = (vaddr - entry->vme_start) + entry->offset;
9018         *object = entry->object.vm_object;
9019         *out_prot = prot;
9020
9021         if (fault_info) {
9022                 fault_info->interruptible = THREAD_UNINT; /* for now... */
9023                 /* ... the caller will change "interruptible" if needed */
9024                 fault_info->cluster_size = 0;
9025                 fault_info->user_tag = entry->alias;
9026                 fault_info->behavior = entry->behavior;
9027                 fault_info->lo_offset = entry->offset;
9028                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9029                 fault_info->no_cache  = entry->no_cache;
9030                 fault_info->stealth = FALSE;
9031                 fault_info->io_sync = FALSE;
9032                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9033                 fault_info->mark_zf_absent = FALSE;
9034         }
9035
9036         /*
9037          *      Lock the object to prevent it from disappearing
9038          */
9039         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9040                 vm_object_lock(*object);
9041         else
9042                 vm_object_lock_shared(*object);
9043
9044         /*
9045          *      Save the version number
9046          */
9047
9048         out_version->main_timestamp = map->timestamp;
9049
9050         return KERN_SUCCESS;
9051 }
9052
9053
9054 /*
9055  *      vm_map_verify:
9056  *
9057  *      Verifies that the map in question has not changed
9058  *      since the given version.  If successful, the map
9059  *      will not change until vm_map_verify_done() is called.
9060  */
9061 boolean_t
9062 vm_map_verify(
9063         register vm_map_t               map,
9064         register vm_map_version_t       *version)       /* REF */
9065 {
9066         boolean_t       result;
9067
9068         vm_map_lock_read(map);
9069         result = (map->timestamp == version->main_timestamp);
9070
9071         if (!result)
9072                 vm_map_unlock_read(map);
9073
9074         return(result);
9075 }
9076
9077 /*
9078  *      vm_map_verify_done:
9079  *
9080  *      Releases locks acquired by a vm_map_verify.
9081  *
9082  *      This is now a macro in vm/vm_map.h.  It does a
9083  *      vm_map_unlock_read on the map.
9084  */
9085
9086
9087 /*
9088  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9089  *      Goes away after regular vm_region_recurse function migrates to
9090  *      64 bits
9091  *      vm_region_recurse: A form of vm_region which follows the
9092  *      submaps in a target map
9093  *
9094  */
9095
9096 kern_return_t
9097 vm_map_region_recurse_64(
9098         vm_map_t                 map,
9099         vm_map_offset_t *address,               /* IN/OUT */
9100         vm_map_size_t           *size,                  /* OUT */
9101         natural_t               *nesting_depth, /* IN/OUT */
9102         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
9103         mach_msg_type_number_t  *count) /* IN/OUT */
9104 {
9105         vm_region_extended_info_data_t  extended;
9106         vm_map_entry_t                  tmp_entry;
9107         vm_map_offset_t                 user_address;
9108         unsigned int                    user_max_depth;
9109
9110         /*
9111          * "curr_entry" is the VM map entry preceding or including the
9112          * address we're looking for.
9113          * "curr_map" is the map or sub-map containing "curr_entry".
9114          * "curr_address" is the equivalent of the top map's "user_address"
9115          * in the current map.
9116          * "curr_offset" is the cumulated offset of "curr_map" in the
9117          * target task's address space.
9118          * "curr_depth" is the depth of "curr_map" in the chain of
9119          * sub-maps.
9120          *
9121          * "curr_max_below" and "curr_max_above" limit the range (around
9122          * "curr_address") we should take into account in the current (sub)map.
9123          * They limit the range to what's visible through the map entries
9124          * we've traversed from the top map to the current map.
9125
9126          */
9127         vm_map_entry_t                  curr_entry;
9128         vm_map_address_t                curr_address;
9129         vm_map_offset_t                 curr_offset;
9130         vm_map_t                        curr_map;
9131         unsigned int                    curr_depth;
9132         vm_map_offset_t                 curr_max_below, curr_max_above;
9133         vm_map_offset_t                 curr_skip;
9134
9135         /*
9136          * "next_" is the same as "curr_" but for the VM region immediately
9137          * after the address we're looking for.  We need to keep track of this
9138          * too because we want to return info about that region if the
9139          * address we're looking for is not mapped.
9140          */
9141         vm_map_entry_t                  next_entry;
9142         vm_map_offset_t                 next_offset;
9143         vm_map_offset_t                 next_address;
9144         vm_map_t                        next_map;
9145         unsigned int                    next_depth;
9146         vm_map_offset_t                 next_max_below, next_max_above;
9147         vm_map_offset_t                 next_skip;
9148
9149         boolean_t                       look_for_pages;
9150         vm_region_submap_short_info_64_t short_info;
9151
9152         if (map == VM_MAP_NULL) {
9153                 /* no address space to work on */
9154                 return KERN_INVALID_ARGUMENT;
9155         }
9156
9157         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9158                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9159                         /*
9160                          * "info" structure is not big enough and
9161                          * would overflow
9162                          */
9163                         return KERN_INVALID_ARGUMENT;
9164                 } else {
9165                         look_for_pages = FALSE;
9166                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9167                         short_info = (vm_region_submap_short_info_64_t) submap_info;
9168                         submap_info = NULL;
9169                 }
9170         } else {
9171                 look_for_pages = TRUE;
9172                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9173                 short_info = NULL;
9174         }
9175
9176
9177         user_address = *address;
9178         user_max_depth = *nesting_depth;
9179
9180         curr_entry = NULL;
9181         curr_map = map;
9182         curr_address = user_address;
9183         curr_offset = 0;
9184         curr_skip = 0;
9185         curr_depth = 0;
9186         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9187         curr_max_below = curr_address;
9188
9189         next_entry = NULL;
9190         next_map = NULL;
9191         next_address = 0;
9192         next_offset = 0;
9193         next_skip = 0;
9194         next_depth = 0;
9195         next_max_above = (vm_map_offset_t) -1;
9196         next_max_below = (vm_map_offset_t) -1;
9197
9198         if (not_in_kdp) {
9199                 vm_map_lock_read(curr_map);
9200         }
9201
9202         for (;;) {
9203                 if (vm_map_lookup_entry(curr_map,
9204                                         curr_address,
9205                                         &tmp_entry)) {
9206                         /* tmp_entry contains the address we're looking for */
9207                         curr_entry = tmp_entry;
9208                 } else {
9209                         vm_map_offset_t skip;
9210                         /*
9211                          * The address is not mapped.  "tmp_entry" is the
9212                          * map entry preceding the address.  We want the next
9213                          * one, if it exists.
9214                          */
9215                         curr_entry = tmp_entry->vme_next;
9216
9217                         if (curr_entry == vm_map_to_entry(curr_map) ||
9218                             (curr_entry->vme_start >=
9219                              curr_address + curr_max_above)) {
9220                                 /* no next entry at this level: stop looking */
9221                                 if (not_in_kdp) {
9222                                         vm_map_unlock_read(curr_map);
9223                                 }
9224                                 curr_entry = NULL;
9225                                 curr_map = NULL;
9226                                 curr_offset = 0;
9227                                 curr_depth = 0;
9228                                 curr_max_above = 0;
9229                                 curr_max_below = 0;
9230                                 break;
9231                         }
9232
9233                         /* adjust current address and offset */
9234                         skip = curr_entry->vme_start - curr_address;
9235                         curr_address = curr_entry->vme_start;
9236                         curr_skip = skip;
9237                         curr_offset += skip;
9238                         curr_max_above -= skip;
9239                         curr_max_below = 0;
9240                 }
9241
9242                 /*
9243                  * Is the next entry at this level closer to the address (or
9244                  * deeper in the submap chain) than the one we had
9245                  * so far ?
9246                  */
9247                 tmp_entry = curr_entry->vme_next;
9248                 if (tmp_entry == vm_map_to_entry(curr_map)) {
9249                         /* no next entry at this level */
9250                 } else if (tmp_entry->vme_start >=
9251                            curr_address + curr_max_above) {
9252                         /*
9253                          * tmp_entry is beyond the scope of what we mapped of
9254                          * this submap in the upper level: ignore it.
9255                          */
9256                 } else if ((next_entry == NULL) ||
9257                            (tmp_entry->vme_start + curr_offset <=
9258                             next_entry->vme_start + next_offset)) {
9259                         /*
9260                          * We didn't have a "next_entry" or this one is
9261                          * closer to the address we're looking for:
9262                          * use this "tmp_entry" as the new "next_entry".
9263                          */
9264                         if (next_entry != NULL) {
9265                                 /* unlock the last "next_map" */
9266                                 if (next_map != curr_map && not_in_kdp) {
9267                                         vm_map_unlock_read(next_map);
9268                                 }
9269                         }
9270                         next_entry = tmp_entry;
9271                         next_map = curr_map;
9272                         next_depth = curr_depth;
9273                         next_address = next_entry->vme_start;
9274                         next_skip = curr_skip;
9275                         next_offset = curr_offset;
9276                         next_offset += (next_address - curr_address);
9277                         next_max_above = MIN(next_max_above, curr_max_above);
9278                         next_max_above = MIN(next_max_above,
9279                                              next_entry->vme_end - next_address);
9280                         next_max_below = MIN(next_max_below, curr_max_below);
9281                         next_max_below = MIN(next_max_below,
9282                                              next_address - next_entry->vme_start);
9283                 }
9284
9285                 /*
9286                  * "curr_max_{above,below}" allow us to keep track of the
9287                  * portion of the submap that is actually mapped at this level:
9288                  * the rest of that submap is irrelevant to us, since it's not
9289                  * mapped here.
9290                  * The relevant portion of the map starts at
9291                  * "curr_entry->offset" up to the size of "curr_entry".
9292                  */
9293                 curr_max_above = MIN(curr_max_above,
9294                                      curr_entry->vme_end - curr_address);
9295                 curr_max_below = MIN(curr_max_below,
9296                                      curr_address - curr_entry->vme_start);
9297
9298                 if (!curr_entry->is_sub_map ||
9299                     curr_depth >= user_max_depth) {
9300                         /*
9301                          * We hit a leaf map or we reached the maximum depth
9302                          * we could, so stop looking.  Keep the current map
9303                          * locked.
9304                          */
9305                         break;
9306                 }
9307
9308                 /*
9309                  * Get down to the next submap level.
9310                  */
9311
9312                 /*
9313                  * Lock the next level and unlock the current level,
9314                  * unless we need to keep it locked to access the "next_entry"
9315                  * later.
9316                  */
9317                 if (not_in_kdp) {
9318                         vm_map_lock_read(curr_entry->object.sub_map);
9319                 }
9320                 if (curr_map == next_map) {
9321                         /* keep "next_map" locked in case we need it */
9322                 } else {
9323                         /* release this map */
9324                         if (not_in_kdp)
9325                                 vm_map_unlock_read(curr_map);
9326                 }
9327
9328                 /*
9329                  * Adjust the offset.  "curr_entry" maps the submap
9330                  * at relative address "curr_entry->vme_start" in the
9331                  * curr_map but skips the first "curr_entry->offset"
9332                  * bytes of the submap.
9333                  * "curr_offset" always represents the offset of a virtual
9334                  * address in the curr_map relative to the absolute address
9335                  * space (i.e. the top-level VM map).
9336                  */
9337                 curr_offset +=
9338                         (curr_entry->offset - curr_entry->vme_start);
9339                 curr_address = user_address + curr_offset;
9340                 /* switch to the submap */
9341                 curr_map = curr_entry->object.sub_map;
9342                 curr_depth++;
9343                 curr_entry = NULL;
9344         }
9345
9346         if (curr_entry == NULL) {
9347                 /* no VM region contains the address... */
9348                 if (next_entry == NULL) {
9349                         /* ... and no VM region follows it either */
9350                         return KERN_INVALID_ADDRESS;
9351                 }
9352                 /* ... gather info about the next VM region */
9353                 curr_entry = next_entry;
9354                 curr_map = next_map;    /* still locked ... */
9355                 curr_address = next_address;
9356                 curr_skip = next_skip;
9357                 curr_offset = next_offset;
9358                 curr_depth = next_depth;
9359                 curr_max_above = next_max_above;
9360                 curr_max_below = next_max_below;
9361                 if (curr_map == map) {
9362                         user_address = curr_address;
9363                 }
9364         } else {
9365                 /* we won't need "next_entry" after all */
9366                 if (next_entry != NULL) {
9367                         /* release "next_map" */
9368                         if (next_map != curr_map && not_in_kdp) {
9369                                 vm_map_unlock_read(next_map);
9370                         }
9371                 }
9372         }
9373         next_entry = NULL;
9374         next_map = NULL;
9375         next_offset = 0;
9376         next_skip = 0;
9377         next_depth = 0;
9378         next_max_below = -1;
9379         next_max_above = -1;
9380
9381         *nesting_depth = curr_depth;
9382         *size = curr_max_above + curr_max_below;
9383         *address = user_address + curr_skip - curr_max_below;
9384
9385 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9386 // so probably should be a real 32b ID vs. ptr.
9387 // Current users just check for equality
9388 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9389
9390         if (look_for_pages) {
9391                 submap_info->user_tag = curr_entry->alias;
9392                 submap_info->offset = curr_entry->offset;
9393                 submap_info->protection = curr_entry->protection;
9394                 submap_info->inheritance = curr_entry->inheritance;
9395                 submap_info->max_protection = curr_entry->max_protection;
9396                 submap_info->behavior = curr_entry->behavior;
9397                 submap_info->user_wired_count = curr_entry->user_wired_count;
9398                 submap_info->is_submap = curr_entry->is_sub_map;
9399                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9400         } else {
9401                 short_info->user_tag = curr_entry->alias;
9402                 short_info->offset = curr_entry->offset;
9403                 short_info->protection = curr_entry->protection;
9404                 short_info->inheritance = curr_entry->inheritance;
9405                 short_info->max_protection = curr_entry->max_protection;
9406                 short_info->behavior = curr_entry->behavior;
9407                 short_info->user_wired_count = curr_entry->user_wired_count;
9408                 short_info->is_submap = curr_entry->is_sub_map;
9409                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9410         }
9411
9412         extended.pages_resident = 0;
9413         extended.pages_swapped_out = 0;
9414         extended.pages_shared_now_private = 0;
9415         extended.pages_dirtied = 0;
9416         extended.external_pager = 0;
9417         extended.shadow_depth = 0;
9418
9419         if (not_in_kdp) {
9420                 if (!curr_entry->is_sub_map) {
9421                         vm_map_offset_t range_start, range_end;
9422                         range_start = MAX((curr_address - curr_max_below),
9423                                           curr_entry->vme_start);
9424                         range_end = MIN((curr_address + curr_max_above),
9425                                         curr_entry->vme_end);
9426                         vm_map_region_walk(curr_map,
9427                                            range_start,
9428                                            curr_entry,
9429                                            (curr_entry->offset +
9430                                             (range_start -
9431                                              curr_entry->vme_start)),
9432                                            range_end - range_start,
9433                                            &extended,
9434                                            look_for_pages);
9435                         if (extended.external_pager &&
9436                             extended.ref_count == 2 &&
9437                             extended.share_mode == SM_SHARED) {
9438                                 extended.share_mode = SM_PRIVATE;
9439                         }
9440                 } else {
9441                         if (curr_entry->use_pmap) {
9442                                 extended.share_mode = SM_TRUESHARED;
9443                         } else {
9444                                 extended.share_mode = SM_PRIVATE;
9445                         }
9446                         extended.ref_count =
9447                                 curr_entry->object.sub_map->ref_count;
9448                 }
9449         }
9450
9451         if (look_for_pages) {
9452                 submap_info->pages_resident = extended.pages_resident;
9453                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9454                 submap_info->pages_shared_now_private =
9455                         extended.pages_shared_now_private;
9456                 submap_info->pages_dirtied = extended.pages_dirtied;
9457                 submap_info->external_pager = extended.external_pager;
9458                 submap_info->shadow_depth = extended.shadow_depth;
9459                 submap_info->share_mode = extended.share_mode;
9460                 submap_info->ref_count = extended.ref_count;
9461         } else {
9462                 short_info->external_pager = extended.external_pager;
9463                 short_info->shadow_depth = extended.shadow_depth;
9464                 short_info->share_mode = extended.share_mode;
9465                 short_info->ref_count = extended.ref_count;
9466         }
9467
9468         if (not_in_kdp) {
9469                 vm_map_unlock_read(curr_map);
9470         }
9471
9472         return KERN_SUCCESS;
9473 }
9474
9475 /*
9476  *      vm_region:
9477  *
9478  *      User call to obtain information about a region in
9479  *      a task's address map. Currently, only one flavor is
9480  *      supported.
9481  *
9482  *      XXX The reserved and behavior fields cannot be filled
9483  *          in until the vm merge from the IK is completed, and
9484  *          vm_reserve is implemented.
9485  */
9486
9487 kern_return_t
9488 vm_map_region(
9489         vm_map_t                 map,
9490         vm_map_offset_t *address,               /* IN/OUT */
9491         vm_map_size_t           *size,                  /* OUT */
9492         vm_region_flavor_t       flavor,                /* IN */
9493         vm_region_info_t         info,                  /* OUT */
9494         mach_msg_type_number_t  *count, /* IN/OUT */
9495         mach_port_t             *object_name)           /* OUT */
9496 {
9497         vm_map_entry_t          tmp_entry;
9498         vm_map_entry_t          entry;
9499         vm_map_offset_t         start;
9500
9501         if (map == VM_MAP_NULL)
9502                 return(KERN_INVALID_ARGUMENT);
9503
9504         switch (flavor) {
9505
9506         case VM_REGION_BASIC_INFO:
9507                 /* legacy for old 32-bit objects info */
9508         {
9509                 vm_region_basic_info_t  basic;
9510
9511                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9512                         return(KERN_INVALID_ARGUMENT);
9513
9514                 basic = (vm_region_basic_info_t) info;
9515                 *count = VM_REGION_BASIC_INFO_COUNT;
9516
9517                 vm_map_lock_read(map);
9518
9519                 start = *address;
9520                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9521                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9522                                 vm_map_unlock_read(map);
9523                                 return(KERN_INVALID_ADDRESS);
9524                         }
9525                 } else {
9526                         entry = tmp_entry;
9527                 }
9528
9529                 start = entry->vme_start;
9530
9531                 basic->offset = (uint32_t)entry->offset;
9532                 basic->protection = entry->protection;
9533                 basic->inheritance = entry->inheritance;
9534                 basic->max_protection = entry->max_protection;
9535                 basic->behavior = entry->behavior;
9536                 basic->user_wired_count = entry->user_wired_count;
9537                 basic->reserved = entry->is_sub_map;
9538                 *address = start;
9539                 *size = (entry->vme_end - start);
9540
9541                 if (object_name) *object_name = IP_NULL;
9542                 if (entry->is_sub_map) {
9543                         basic->shared = FALSE;
9544                 } else {
9545                         basic->shared = entry->is_shared;
9546                 }
9547
9548                 vm_map_unlock_read(map);
9549                 return(KERN_SUCCESS);
9550         }
9551
9552         case VM_REGION_BASIC_INFO_64:
9553         {
9554                 vm_region_basic_info_64_t       basic;
9555
9556                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9557                         return(KERN_INVALID_ARGUMENT);
9558
9559                 basic = (vm_region_basic_info_64_t) info;
9560                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9561
9562                 vm_map_lock_read(map);
9563
9564                 start = *address;
9565                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9566                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9567                                 vm_map_unlock_read(map);
9568                                 return(KERN_INVALID_ADDRESS);
9569                         }
9570                 } else {
9571                         entry = tmp_entry;
9572                 }
9573
9574                 start = entry->vme_start;
9575
9576                 basic->offset = entry->offset;
9577                 basic->protection = entry->protection;
9578                 basic->inheritance = entry->inheritance;
9579                 basic->max_protection = entry->max_protection;
9580                 basic->behavior = entry->behavior;
9581                 basic->user_wired_count = entry->user_wired_count;
9582                 basic->reserved = entry->is_sub_map;
9583                 *address = start;
9584                 *size = (entry->vme_end - start);
9585
9586                 if (object_name) *object_name = IP_NULL;
9587                 if (entry->is_sub_map) {
9588                         basic->shared = FALSE;
9589                 } else {
9590                         basic->shared = entry->is_shared;
9591                 }
9592
9593                 vm_map_unlock_read(map);
9594                 return(KERN_SUCCESS);
9595         }
9596         case VM_REGION_EXTENDED_INFO:
9597         {
9598                 vm_region_extended_info_t       extended;
9599
9600                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9601                         return(KERN_INVALID_ARGUMENT);
9602
9603                 extended = (vm_region_extended_info_t) info;
9604                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9605
9606                 vm_map_lock_read(map);
9607
9608                 start = *address;
9609                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9610                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9611                                 vm_map_unlock_read(map);
9612                                 return(KERN_INVALID_ADDRESS);
9613                         }
9614                 } else {
9615                         entry = tmp_entry;
9616                 }
9617                 start = entry->vme_start;
9618
9619                 extended->protection = entry->protection;
9620                 extended->user_tag = entry->alias;
9621                 extended->pages_resident = 0;
9622                 extended->pages_swapped_out = 0;
9623                 extended->pages_shared_now_private = 0;
9624                 extended->pages_dirtied = 0;
9625                 extended->external_pager = 0;
9626                 extended->shadow_depth = 0;
9627
9628                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9629
9630                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9631                         extended->share_mode = SM_PRIVATE;
9632
9633                 if (object_name)
9634                         *object_name = IP_NULL;
9635                 *address = start;
9636                 *size = (entry->vme_end - start);
9637
9638                 vm_map_unlock_read(map);
9639                 return(KERN_SUCCESS);
9640         }
9641         case VM_REGION_TOP_INFO:
9642         {
9643                 vm_region_top_info_t    top;
9644
9645                 if (*count < VM_REGION_TOP_INFO_COUNT)
9646                         return(KERN_INVALID_ARGUMENT);
9647
9648                 top = (vm_region_top_info_t) info;
9649                 *count = VM_REGION_TOP_INFO_COUNT;
9650
9651                 vm_map_lock_read(map);
9652
9653                 start = *address;
9654                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9655                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9656                                 vm_map_unlock_read(map);
9657                                 return(KERN_INVALID_ADDRESS);
9658                         }
9659                 } else {
9660                         entry = tmp_entry;
9661
9662                 }
9663                 start = entry->vme_start;
9664
9665                 top->private_pages_resident = 0;
9666                 top->shared_pages_resident = 0;
9667
9668                 vm_map_region_top_walk(entry, top);
9669
9670                 if (object_name)
9671                         *object_name = IP_NULL;
9672                 *address = start;
9673                 *size = (entry->vme_end - start);
9674
9675                 vm_map_unlock_read(map);
9676                 return(KERN_SUCCESS);
9677         }
9678         default:
9679                 return(KERN_INVALID_ARGUMENT);
9680         }
9681 }
9682
9683 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9684         MIN((entry_size),                                               \
9685             ((obj)->all_reusable ?                                      \
9686              (obj)->wired_page_count :                                  \
9687              (obj)->resident_page_count - (obj)->reusable_page_count))
9688
9689 void
9690 vm_map_region_top_walk(
9691         vm_map_entry_t             entry,
9692         vm_region_top_info_t       top)
9693 {
9694
9695         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9696                 top->share_mode = SM_EMPTY;
9697                 top->ref_count = 0;
9698                 top->obj_id = 0;
9699                 return;
9700         }
9701
9702         {
9703                 struct  vm_object *obj, *tmp_obj;
9704                 int             ref_count;
9705                 uint32_t        entry_size;
9706
9707                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9708
9709                 obj = entry->object.vm_object;
9710
9711                 vm_object_lock(obj);
9712
9713                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9714                         ref_count--;
9715
9716                 assert(obj->reusable_page_count <= obj->resident_page_count);
9717                 if (obj->shadow) {
9718                         if (ref_count == 1)
9719                                 top->private_pages_resident =
9720                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9721                         else
9722                                 top->shared_pages_resident =
9723                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9724                         top->ref_count  = ref_count;
9725                         top->share_mode = SM_COW;
9726
9727                         while ((tmp_obj = obj->shadow)) {
9728                                 vm_object_lock(tmp_obj);
9729                                 vm_object_unlock(obj);
9730                                 obj = tmp_obj;
9731
9732                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9733                                         ref_count--;
9734
9735                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9736                                 top->shared_pages_resident +=
9737                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9738                                 top->ref_count += ref_count - 1;
9739                         }
9740                 } else {
9741                         if (entry->superpage_size) {
9742                                 top->share_mode = SM_LARGE_PAGE;
9743                                 top->shared_pages_resident = 0;
9744                                 top->private_pages_resident = entry_size;
9745                         } else if (entry->needs_copy) {
9746                                 top->share_mode = SM_COW;
9747                                 top->shared_pages_resident =
9748                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9749                         } else {
9750                                 if (ref_count == 1 ||
9751                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9752                                         top->share_mode = SM_PRIVATE;
9753                                         top->private_pages_resident =
9754                                                 OBJ_RESIDENT_COUNT(obj,
9755                                                                    entry_size);
9756                                 } else {
9757                                         top->share_mode = SM_SHARED;
9758                                         top->shared_pages_resident =
9759                                                 OBJ_RESIDENT_COUNT(obj,
9760                                                                   entry_size);
9761                                 }
9762                         }
9763                         top->ref_count = ref_count;
9764                 }
9765                 /* XXX K64: obj_id will be truncated */
9766                 top->obj_id = (unsigned int) (uintptr_t)obj;
9767
9768                 vm_object_unlock(obj);
9769         }
9770 }
9771
9772 void
9773 vm_map_region_walk(
9774         vm_map_t                        map,
9775         vm_map_offset_t                 va,
9776         vm_map_entry_t                  entry,
9777         vm_object_offset_t              offset,
9778         vm_object_size_t                range,
9779         vm_region_extended_info_t       extended,
9780         boolean_t                       look_for_pages)
9781 {
9782         register struct vm_object *obj, *tmp_obj;
9783         register vm_map_offset_t       last_offset;
9784         register int               i;
9785         register int               ref_count;
9786         struct vm_object        *shadow_object;
9787         int                     shadow_depth;
9788
9789         if ((entry->object.vm_object == 0) ||
9790             (entry->is_sub_map) ||
9791             (entry->object.vm_object->phys_contiguous &&
9792              !entry->superpage_size)) {
9793                 extended->share_mode = SM_EMPTY;
9794                 extended->ref_count = 0;
9795                 return;
9796         }
9797
9798         if (entry->superpage_size) {
9799                 extended->shadow_depth = 0;
9800                 extended->share_mode = SM_LARGE_PAGE;
9801                 extended->ref_count = 1;
9802                 extended->external_pager = 0;
9803                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9804                 extended->shadow_depth = 0;
9805                 return;
9806         }
9807
9808         {
9809                 obj = entry->object.vm_object;
9810
9811                 vm_object_lock(obj);
9812
9813                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9814                         ref_count--;
9815
9816                 if (look_for_pages) {
9817                         for (last_offset = offset + range;
9818                              offset < last_offset;
9819                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9820                                 vm_map_region_look_for_page(map, va, obj,
9821                                                             offset, ref_count,
9822                                                             0, extended);
9823                 } else {
9824                         shadow_object = obj->shadow;
9825                         shadow_depth = 0;
9826
9827                         if ( !(obj->pager_trusted) && !(obj->internal))
9828                                 extended->external_pager = 1;
9829
9830                         if (shadow_object != VM_OBJECT_NULL) {
9831                                 vm_object_lock(shadow_object);
9832                                 for (;
9833                                      shadow_object != VM_OBJECT_NULL;
9834                                      shadow_depth++) {
9835                                         vm_object_t     next_shadow;
9836
9837                                         if ( !(shadow_object->pager_trusted) &&
9838                                              !(shadow_object->internal))
9839                                                 extended->external_pager = 1;
9840
9841                                         next_shadow = shadow_object->shadow;
9842                                         if (next_shadow) {
9843                                                 vm_object_lock(next_shadow);
9844                                         }
9845                                         vm_object_unlock(shadow_object);
9846                                         shadow_object = next_shadow;
9847                                 }
9848                         }
9849                         extended->shadow_depth = shadow_depth;
9850                 }
9851
9852                 if (extended->shadow_depth || entry->needs_copy)
9853                         extended->share_mode = SM_COW;
9854                 else {
9855                         if (ref_count == 1)
9856                                 extended->share_mode = SM_PRIVATE;
9857                         else {
9858                                 if (obj->true_share)
9859                                         extended->share_mode = SM_TRUESHARED;
9860                                 else
9861                                         extended->share_mode = SM_SHARED;
9862                         }
9863                 }
9864                 extended->ref_count = ref_count - extended->shadow_depth;
9865
9866                 for (i = 0; i < extended->shadow_depth; i++) {
9867                         if ((tmp_obj = obj->shadow) == 0)
9868                                 break;
9869                         vm_object_lock(tmp_obj);
9870                         vm_object_unlock(obj);
9871
9872                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9873                                 ref_count--;
9874
9875                         extended->ref_count += ref_count;
9876                         obj = tmp_obj;
9877                 }
9878                 vm_object_unlock(obj);
9879
9880                 if (extended->share_mode == SM_SHARED) {
9881                         register vm_map_entry_t      cur;
9882                         register vm_map_entry_t      last;
9883                         int      my_refs;
9884
9885                         obj = entry->object.vm_object;
9886                         last = vm_map_to_entry(map);
9887                         my_refs = 0;
9888
9889                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9890                                 ref_count--;
9891                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9892                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
9893
9894                         if (my_refs == ref_count)
9895                                 extended->share_mode = SM_PRIVATE_ALIASED;
9896                         else if (my_refs > 1)
9897                                 extended->share_mode = SM_SHARED_ALIASED;
9898                 }
9899         }
9900 }
9901
9902
9903 /* object is locked on entry and locked on return */
9904
9905
9906 static void
9907 vm_map_region_look_for_page(
9908         __unused vm_map_t               map,
9909         __unused vm_map_offset_t        va,
9910         vm_object_t                     object,
9911         vm_object_offset_t              offset,
9912         int                             max_refcnt,
9913         int                             depth,
9914         vm_region_extended_info_t       extended)
9915 {
9916         register vm_page_t      p;
9917         register vm_object_t    shadow;
9918         register int            ref_count;
9919         vm_object_t             caller_object;
9920 #if     MACH_PAGEMAP
9921         kern_return_t           kr;
9922 #endif
9923         shadow = object->shadow;
9924         caller_object = object;
9925
9926
9927         while (TRUE) {
9928
9929                 if ( !(object->pager_trusted) && !(object->internal))
9930                         extended->external_pager = 1;
9931
9932                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9933                         if (shadow && (max_refcnt == 1))
9934                                 extended->pages_shared_now_private++;
9935
9936                         if (!p->fictitious &&
9937                             (p->dirty || pmap_is_modified(p->phys_page)))
9938                                 extended->pages_dirtied++;
9939
9940                         extended->pages_resident++;
9941
9942                         if(object != caller_object)
9943                                 vm_object_unlock(object);
9944
9945                         return;
9946                 }
9947 #if     MACH_PAGEMAP
9948                 if (object->existence_map) {
9949                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9950
9951                                 extended->pages_swapped_out++;
9952
9953                                 if(object != caller_object)
9954                                         vm_object_unlock(object);
9955
9956                                 return;
9957                         }
9958                 } else if (object->internal &&
9959                            object->alive &&
9960                            !object->terminating &&
9961                            object->pager_ready) {
9962
9963                         memory_object_t pager;
9964
9965                         vm_object_paging_begin(object);
9966                         pager = object->pager;
9967                         vm_object_unlock(object);
9968
9969                         kr = memory_object_data_request(
9970                                 pager,
9971                                 offset + object->paging_offset,
9972                                 0, /* just poke the pager */
9973                                 VM_PROT_READ,
9974                                 NULL);
9975
9976                         vm_object_lock(object);
9977                         vm_object_paging_end(object);
9978
9979                         if (kr == KERN_SUCCESS) {
9980                                 /* the pager has that page */
9981                                 extended->pages_swapped_out++;
9982                                 if (object != caller_object)
9983                                         vm_object_unlock(object);
9984                                 return;
9985                         }
9986                 }
9987 #endif /* MACH_PAGEMAP */
9988
9989                 if (shadow) {
9990                         vm_object_lock(shadow);
9991
9992                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9993                                 ref_count--;
9994
9995                         if (++depth > extended->shadow_depth)
9996                                 extended->shadow_depth = depth;
9997
9998                         if (ref_count > max_refcnt)
9999                                 max_refcnt = ref_count;
10000
10001                         if(object != caller_object)
10002                                 vm_object_unlock(object);
10003
10004                         offset = offset + object->vo_shadow_offset;
10005                         object = shadow;
10006                         shadow = object->shadow;
10007                         continue;
10008                 }
10009                 if(object != caller_object)
10010                         vm_object_unlock(object);
10011                 break;
10012         }
10013 }
10014
10015 static int
10016 vm_map_region_count_obj_refs(
10017         vm_map_entry_t    entry,
10018         vm_object_t       object)
10019 {
10020         register int ref_count;
10021         register vm_object_t chk_obj;
10022         register vm_object_t tmp_obj;
10023
10024         if (entry->object.vm_object == 0)
10025                 return(0);
10026
10027         if (entry->is_sub_map)
10028                 return(0);
10029         else {
10030                 ref_count = 0;
10031
10032                 chk_obj = entry->object.vm_object;
10033                 vm_object_lock(chk_obj);
10034
10035                 while (chk_obj) {
10036                         if (chk_obj == object)
10037                                 ref_count++;
10038                         tmp_obj = chk_obj->shadow;
10039                         if (tmp_obj)
10040                                 vm_object_lock(tmp_obj);
10041                         vm_object_unlock(chk_obj);
10042
10043                         chk_obj = tmp_obj;
10044                 }
10045         }
10046         return(ref_count);
10047 }
10048
10049
10050 /*
10051  *      Routine:        vm_map_simplify
10052  *
10053  *      Description:
10054  *              Attempt to simplify the map representation in
10055  *              the vicinity of the given starting address.
10056  *      Note:
10057  *              This routine is intended primarily to keep the
10058  *              kernel maps more compact -- they generally don't
10059  *              benefit from the "expand a map entry" technology
10060  *              at allocation time because the adjacent entry
10061  *              is often wired down.
10062  */
10063 void
10064 vm_map_simplify_entry(
10065         vm_map_t        map,
10066         vm_map_entry_t  this_entry)
10067 {
10068         vm_map_entry_t  prev_entry;
10069
10070         counter(c_vm_map_simplify_entry_called++);
10071
10072         prev_entry = this_entry->vme_prev;
10073
10074         if ((this_entry != vm_map_to_entry(map)) &&
10075             (prev_entry != vm_map_to_entry(map)) &&
10076
10077             (prev_entry->vme_end == this_entry->vme_start) &&
10078
10079             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10080
10081             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10082             ((prev_entry->offset + (prev_entry->vme_end -
10083                                     prev_entry->vme_start))
10084              == this_entry->offset) &&
10085
10086             (prev_entry->inheritance == this_entry->inheritance) &&
10087             (prev_entry->protection == this_entry->protection) &&
10088             (prev_entry->max_protection == this_entry->max_protection) &&
10089             (prev_entry->behavior == this_entry->behavior) &&
10090             (prev_entry->alias == this_entry->alias) &&
10091             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10092             (prev_entry->no_cache == this_entry->no_cache) &&
10093             (prev_entry->wired_count == this_entry->wired_count) &&
10094             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10095
10096             (prev_entry->needs_copy == this_entry->needs_copy) &&
10097             (prev_entry->permanent == this_entry->permanent) &&
10098
10099             (prev_entry->use_pmap == FALSE) &&
10100             (this_entry->use_pmap == FALSE) &&
10101             (prev_entry->in_transition == FALSE) &&
10102             (this_entry->in_transition == FALSE) &&
10103             (prev_entry->needs_wakeup == FALSE) &&
10104             (this_entry->needs_wakeup == FALSE) &&
10105             (prev_entry->is_shared == FALSE) &&
10106             (this_entry->is_shared == FALSE)
10107                 ) {
10108                 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
10109                 assert(prev_entry->vme_start < this_entry->vme_end);
10110                 this_entry->vme_start = prev_entry->vme_start;
10111                 this_entry->offset = prev_entry->offset;
10112                 if (prev_entry->is_sub_map) {
10113                         vm_map_deallocate(prev_entry->object.sub_map);
10114                 } else {
10115                         vm_object_deallocate(prev_entry->object.vm_object);
10116                 }
10117                 vm_map_entry_dispose(map, prev_entry);
10118                 SAVE_HINT_MAP_WRITE(map, this_entry);
10119                 counter(c_vm_map_simplified++);
10120         }
10121 }
10122
10123 void
10124 vm_map_simplify(
10125         vm_map_t        map,
10126         vm_map_offset_t start)
10127 {
10128         vm_map_entry_t  this_entry;
10129
10130         vm_map_lock(map);
10131         if (vm_map_lookup_entry(map, start, &this_entry)) {
10132                 vm_map_simplify_entry(map, this_entry);
10133                 vm_map_simplify_entry(map, this_entry->vme_next);
10134         }
10135         counter(c_vm_map_simplify_called++);
10136         vm_map_unlock(map);
10137 }
10138
10139 static void
10140 vm_map_simplify_range(
10141         vm_map_t        map,
10142         vm_map_offset_t start,
10143         vm_map_offset_t end)
10144 {
10145         vm_map_entry_t  entry;
10146
10147         /*
10148          * The map should be locked (for "write") by the caller.
10149          */
10150
10151         if (start >= end) {
10152                 /* invalid address range */
10153                 return;
10154         }
10155
10156         start = vm_map_trunc_page(start);
10157         end = vm_map_round_page(end);
10158
10159         if (!vm_map_lookup_entry(map, start, &entry)) {
10160                 /* "start" is not mapped and "entry" ends before "start" */
10161                 if (entry == vm_map_to_entry(map)) {
10162                         /* start with first entry in the map */
10163                         entry = vm_map_first_entry(map);
10164                 } else {
10165                         /* start with next entry */
10166                         entry = entry->vme_next;
10167                 }
10168         }
10169
10170         while (entry != vm_map_to_entry(map) &&
10171                entry->vme_start <= end) {
10172                 /* try and coalesce "entry" with its previous entry */
10173                 vm_map_simplify_entry(map, entry);
10174                 entry = entry->vme_next;
10175         }
10176 }
10177
10178
10179 /*
10180  *      Routine:        vm_map_machine_attribute
10181  *      Purpose:
10182  *              Provide machine-specific attributes to mappings,
10183  *              such as cachability etc. for machines that provide
10184  *              them.  NUMA architectures and machines with big/strange
10185  *              caches will use this.
10186  *      Note:
10187  *              Responsibilities for locking and checking are handled here,
10188  *              everything else in the pmap module. If any non-volatile
10189  *              information must be kept, the pmap module should handle
10190  *              it itself. [This assumes that attributes do not
10191  *              need to be inherited, which seems ok to me]
10192  */
10193 kern_return_t
10194 vm_map_machine_attribute(
10195         vm_map_t                        map,
10196         vm_map_offset_t         start,
10197         vm_map_offset_t         end,
10198         vm_machine_attribute_t  attribute,
10199         vm_machine_attribute_val_t* value)              /* IN/OUT */
10200 {
10201         kern_return_t   ret;
10202         vm_map_size_t sync_size;
10203         vm_map_entry_t entry;
10204
10205         if (start < vm_map_min(map) || end > vm_map_max(map))
10206                 return KERN_INVALID_ADDRESS;
10207
10208         /* Figure how much memory we need to flush (in page increments) */
10209         sync_size = end - start;
10210
10211         vm_map_lock(map);
10212
10213         if (attribute != MATTR_CACHE) {
10214                 /* If we don't have to find physical addresses, we */
10215                 /* don't have to do an explicit traversal here.    */
10216                 ret = pmap_attribute(map->pmap, start, end-start,
10217                                      attribute, value);
10218                 vm_map_unlock(map);
10219                 return ret;
10220         }
10221
10222         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
10223
10224         while(sync_size) {
10225                 if (vm_map_lookup_entry(map, start, &entry)) {
10226                         vm_map_size_t   sub_size;
10227                         if((entry->vme_end - start) > sync_size) {
10228                                 sub_size = sync_size;
10229                                 sync_size = 0;
10230                         } else {
10231                                 sub_size = entry->vme_end - start;
10232                                 sync_size -= sub_size;
10233                         }
10234                         if(entry->is_sub_map) {
10235                                 vm_map_offset_t sub_start;
10236                                 vm_map_offset_t sub_end;
10237
10238                                 sub_start = (start - entry->vme_start)
10239                                         + entry->offset;
10240                                 sub_end = sub_start + sub_size;
10241                                 vm_map_machine_attribute(
10242                                         entry->object.sub_map,
10243                                         sub_start,
10244                                         sub_end,
10245                                         attribute, value);
10246                         } else {
10247                                 if(entry->object.vm_object) {
10248                                         vm_page_t               m;
10249                                         vm_object_t             object;
10250                                         vm_object_t             base_object;
10251                                         vm_object_t             last_object;
10252                                         vm_object_offset_t      offset;
10253                                         vm_object_offset_t      base_offset;
10254                                         vm_map_size_t           range;
10255                                         range = sub_size;
10256                                         offset = (start - entry->vme_start)
10257                                                 + entry->offset;
10258                                         base_offset = offset;
10259                                         object = entry->object.vm_object;
10260                                         base_object = object;
10261                                         last_object = NULL;
10262
10263                                         vm_object_lock(object);
10264
10265                                         while (range) {
10266                                                 m = vm_page_lookup(
10267                                                         object, offset);
10268
10269                                                 if (m && !m->fictitious) {
10270                                                         ret =
10271                                                                 pmap_attribute_cache_sync(
10272                                                                         m->phys_page,
10273                                                                         PAGE_SIZE,
10274                                                                         attribute, value);
10275
10276                                                 } else if (object->shadow) {
10277                                                         offset = offset + object->vo_shadow_offset;
10278                                                         last_object = object;
10279                                                         object = object->shadow;
10280                                                         vm_object_lock(last_object->shadow);
10281                                                         vm_object_unlock(last_object);
10282                                                         continue;
10283                                                 }
10284                                                 range -= PAGE_SIZE;
10285
10286                                                 if (base_object != object) {
10287                                                         vm_object_unlock(object);
10288                                                         vm_object_lock(base_object);
10289                                                         object = base_object;
10290                                                 }
10291                                                 /* Bump to the next page */
10292                                                 base_offset += PAGE_SIZE;
10293                                                 offset = base_offset;
10294                                         }
10295                                         vm_object_unlock(object);
10296                                 }
10297                         }
10298                         start += sub_size;
10299                 } else {
10300                         vm_map_unlock(map);
10301                         return KERN_FAILURE;
10302                 }
10303
10304         }
10305
10306         vm_map_unlock(map);
10307
10308         return ret;
10309 }
10310
10311 /*
10312  *      vm_map_behavior_set:
10313  *
10314  *      Sets the paging reference behavior of the specified address
10315  *      range in the target map.  Paging reference behavior affects
10316  *      how pagein operations resulting from faults on the map will be
10317  *      clustered.
10318  */
10319 kern_return_t
10320 vm_map_behavior_set(
10321         vm_map_t        map,
10322         vm_map_offset_t start,
10323         vm_map_offset_t end,
10324         vm_behavior_t   new_behavior)
10325 {
10326         register vm_map_entry_t entry;
10327         vm_map_entry_t  temp_entry;
10328
10329         XPR(XPR_VM_MAP,
10330             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10331             map, start, end, new_behavior, 0);
10332
10333         if (start > end ||
10334             start < vm_map_min(map) ||
10335             end > vm_map_max(map)) {
10336                 return KERN_NO_SPACE;
10337         }
10338
10339         switch (new_behavior) {
10340
10341         /*
10342          * This first block of behaviors all set a persistent state on the specified
10343          * memory range.  All we have to do here is to record the desired behavior
10344          * in the vm_map_entry_t's.
10345          */
10346
10347         case VM_BEHAVIOR_DEFAULT:
10348         case VM_BEHAVIOR_RANDOM:
10349         case VM_BEHAVIOR_SEQUENTIAL:
10350         case VM_BEHAVIOR_RSEQNTL:
10351         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10352                 vm_map_lock(map);
10353
10354                 /*
10355                  *      The entire address range must be valid for the map.
10356                  *      Note that vm_map_range_check() does a
10357                  *      vm_map_lookup_entry() internally and returns the
10358                  *      entry containing the start of the address range if
10359                  *      the entire range is valid.
10360                  */
10361                 if (vm_map_range_check(map, start, end, &temp_entry)) {
10362                         entry = temp_entry;
10363                         vm_map_clip_start(map, entry, start);
10364                 }
10365                 else {
10366                         vm_map_unlock(map);
10367                         return(KERN_INVALID_ADDRESS);
10368                 }
10369
10370                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10371                         vm_map_clip_end(map, entry, end);
10372                         assert(!entry->use_pmap);
10373
10374                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10375                                 entry->zero_wired_pages = TRUE;
10376                         } else {
10377                                 entry->behavior = new_behavior;
10378                         }
10379                         entry = entry->vme_next;
10380                 }
10381
10382                 vm_map_unlock(map);
10383                 break;
10384
10385         /*
10386          * The rest of these are different from the above in that they cause
10387          * an immediate action to take place as opposed to setting a behavior that
10388          * affects future actions.
10389          */
10390
10391         case VM_BEHAVIOR_WILLNEED:
10392                 return vm_map_willneed(map, start, end);
10393
10394         case VM_BEHAVIOR_DONTNEED:
10395                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10396
10397         case VM_BEHAVIOR_FREE:
10398                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10399
10400         case VM_BEHAVIOR_REUSABLE:
10401                 return vm_map_reusable_pages(map, start, end);
10402
10403         case VM_BEHAVIOR_REUSE:
10404                 return vm_map_reuse_pages(map, start, end);
10405
10406         case VM_BEHAVIOR_CAN_REUSE:
10407                 return vm_map_can_reuse(map, start, end);
10408
10409         default:
10410                 return(KERN_INVALID_ARGUMENT);
10411         }
10412
10413         return(KERN_SUCCESS);
10414 }
10415
10416
10417 /*
10418  * Internals for madvise(MADV_WILLNEED) system call.
10419  *
10420  * The present implementation is to do a read-ahead if the mapping corresponds
10421  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10422  * and basically ignore the "advice" (which we are always free to do).
10423  */
10424
10425
10426 static kern_return_t
10427 vm_map_willneed(
10428         vm_map_t        map,
10429         vm_map_offset_t start,
10430         vm_map_offset_t end
10431 )
10432 {
10433         vm_map_entry_t                  entry;
10434         vm_object_t                     object;
10435         memory_object_t                 pager;
10436         struct vm_object_fault_info     fault_info;
10437         kern_return_t                   kr;
10438         vm_object_size_t                len;
10439         vm_object_offset_t              offset;
10440
10441         /*
10442          * Fill in static values in fault_info.  Several fields get ignored by the code
10443          * we call, but we'll fill them in anyway since uninitialized fields are bad
10444          * when it comes to future backwards compatibility.
10445          */
10446
10447         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10448         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10449         fault_info.no_cache      = FALSE;                       /* ignored value */
10450         fault_info.stealth       = TRUE;
10451         fault_info.io_sync = FALSE;
10452         fault_info.cs_bypass = FALSE;
10453         fault_info.mark_zf_absent = FALSE;
10454
10455         /*
10456          * The MADV_WILLNEED operation doesn't require any changes to the
10457          * vm_map_entry_t's, so the read lock is sufficient.
10458          */
10459
10460         vm_map_lock_read(map);
10461
10462         /*
10463          * The madvise semantics require that the address range be fully
10464          * allocated with no holes.  Otherwise, we're required to return
10465          * an error.
10466          */
10467
10468         if (! vm_map_range_check(map, start, end, &entry)) {
10469                 vm_map_unlock_read(map);
10470                 return KERN_INVALID_ADDRESS;
10471         }
10472
10473         /*
10474          * Examine each vm_map_entry_t in the range.
10475          */
10476         for (; entry != vm_map_to_entry(map) && start < end; ) {
10477
10478                 /*
10479                  * The first time through, the start address could be anywhere
10480                  * within the vm_map_entry we found.  So adjust the offset to
10481                  * correspond.  After that, the offset will always be zero to
10482                  * correspond to the beginning of the current vm_map_entry.
10483                  */
10484                 offset = (start - entry->vme_start) + entry->offset;
10485
10486                 /*
10487                  * Set the length so we don't go beyond the end of the
10488                  * map_entry or beyond the end of the range we were given.
10489                  * This range could span also multiple map entries all of which
10490                  * map different files, so make sure we only do the right amount
10491                  * of I/O for each object.  Note that it's possible for there
10492                  * to be multiple map entries all referring to the same object
10493                  * but with different page permissions, but it's not worth
10494                  * trying to optimize that case.
10495                  */
10496                 len = MIN(entry->vme_end - start, end - start);
10497
10498                 if ((vm_size_t) len != len) {
10499                         /* 32-bit overflow */
10500                         len = (vm_size_t) (0 - PAGE_SIZE);
10501                 }
10502                 fault_info.cluster_size = (vm_size_t) len;
10503                 fault_info.lo_offset    = offset;
10504                 fault_info.hi_offset    = offset + len;
10505                 fault_info.user_tag     = entry->alias;
10506
10507                 /*
10508                  * If there's no read permission to this mapping, then just
10509                  * skip it.
10510                  */
10511                 if ((entry->protection & VM_PROT_READ) == 0) {
10512                         entry = entry->vme_next;
10513                         start = entry->vme_start;
10514                         continue;
10515                 }
10516
10517                 /*
10518                  * Find the file object backing this map entry.  If there is
10519                  * none, then we simply ignore the "will need" advice for this
10520                  * entry and go on to the next one.
10521                  */
10522                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10523                         entry = entry->vme_next;
10524                         start = entry->vme_start;
10525                         continue;
10526                 }
10527
10528                 /*
10529                  * The data_request() could take a long time, so let's
10530                  * release the map lock to avoid blocking other threads.
10531                  */
10532                 vm_map_unlock_read(map);
10533
10534                 vm_object_paging_begin(object);
10535                 pager = object->pager;
10536                 vm_object_unlock(object);
10537
10538                 /*
10539                  * Get the data from the object asynchronously.
10540                  *
10541                  * Note that memory_object_data_request() places limits on the
10542                  * amount of I/O it will do.  Regardless of the len we
10543                  * specified, it won't do more than MAX_UPL_TRANSFER and it
10544                  * silently truncates the len to that size.  This isn't
10545                  * necessarily bad since madvise shouldn't really be used to
10546                  * page in unlimited amounts of data.  Other Unix variants
10547                  * limit the willneed case as well.  If this turns out to be an
10548                  * issue for developers, then we can always adjust the policy
10549                  * here and still be backwards compatible since this is all
10550                  * just "advice".
10551                  */
10552                 kr = memory_object_data_request(
10553                         pager,
10554                         offset + object->paging_offset,
10555                         0,      /* ignored */
10556                         VM_PROT_READ,
10557                         (memory_object_fault_info_t)&fault_info);
10558
10559                 vm_object_lock(object);
10560                 vm_object_paging_end(object);
10561                 vm_object_unlock(object);
10562
10563                 /*
10564                  * If we couldn't do the I/O for some reason, just give up on
10565                  * the madvise.  We still return success to the user since
10566                  * madvise isn't supposed to fail when the advice can't be
10567                  * taken.
10568                  */
10569                 if (kr != KERN_SUCCESS) {
10570                         return KERN_SUCCESS;
10571                 }
10572
10573                 start += len;
10574                 if (start >= end) {
10575                         /* done */
10576                         return KERN_SUCCESS;
10577                 }
10578
10579                 /* look up next entry */
10580                 vm_map_lock_read(map);
10581                 if (! vm_map_lookup_entry(map, start, &entry)) {
10582                         /*
10583                          * There's a new hole in the address range.
10584                          */
10585                         vm_map_unlock_read(map);
10586                         return KERN_INVALID_ADDRESS;
10587                 }
10588         }
10589
10590         vm_map_unlock_read(map);
10591         return KERN_SUCCESS;
10592 }
10593
10594 static boolean_t
10595 vm_map_entry_is_reusable(
10596         vm_map_entry_t entry)
10597 {
10598         vm_object_t object;
10599
10600         if (entry->is_shared ||
10601             entry->is_sub_map ||
10602             entry->in_transition ||
10603             entry->protection != VM_PROT_DEFAULT ||
10604             entry->max_protection != VM_PROT_ALL ||
10605             entry->inheritance != VM_INHERIT_DEFAULT ||
10606             entry->no_cache ||
10607             entry->permanent ||
10608             entry->superpage_size != 0 ||
10609             entry->zero_wired_pages ||
10610             entry->wired_count != 0 ||
10611             entry->user_wired_count != 0) {
10612                 return FALSE;
10613         }
10614
10615         object = entry->object.vm_object;
10616         if (object == VM_OBJECT_NULL) {
10617                 return TRUE;
10618         }
10619         if (object->ref_count == 1 &&
10620             object->wired_page_count == 0 &&
10621             object->copy == VM_OBJECT_NULL &&
10622             object->shadow == VM_OBJECT_NULL &&
10623             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10624             object->internal &&
10625             !object->true_share &&
10626             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10627             !object->code_signed) {
10628                 return TRUE;
10629         }
10630         return FALSE;
10631
10632
10633 }
10634
10635 static kern_return_t
10636 vm_map_reuse_pages(
10637         vm_map_t        map,
10638         vm_map_offset_t start,
10639         vm_map_offset_t end)
10640 {
10641         vm_map_entry_t                  entry;
10642         vm_object_t                     object;
10643         vm_object_offset_t              start_offset, end_offset;
10644
10645         /*
10646          * The MADV_REUSE operation doesn't require any changes to the
10647          * vm_map_entry_t's, so the read lock is sufficient.
10648          */
10649
10650         vm_map_lock_read(map);
10651
10652         /*
10653          * The madvise semantics require that the address range be fully
10654          * allocated with no holes.  Otherwise, we're required to return
10655          * an error.
10656          */
10657
10658         if (!vm_map_range_check(map, start, end, &entry)) {
10659                 vm_map_unlock_read(map);
10660                 vm_page_stats_reusable.reuse_pages_failure++;
10661                 return KERN_INVALID_ADDRESS;
10662         }
10663
10664         /*
10665          * Examine each vm_map_entry_t in the range.
10666          */
10667         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10668              entry = entry->vme_next) {
10669                 /*
10670                  * Sanity check on the VM map entry.
10671                  */
10672                 if (! vm_map_entry_is_reusable(entry)) {
10673                         vm_map_unlock_read(map);
10674                         vm_page_stats_reusable.reuse_pages_failure++;
10675                         return KERN_INVALID_ADDRESS;
10676                 }
10677
10678                 /*
10679                  * The first time through, the start address could be anywhere
10680                  * within the vm_map_entry we found.  So adjust the offset to
10681                  * correspond.
10682                  */
10683                 if (entry->vme_start < start) {
10684                         start_offset = start - entry->vme_start;
10685                 } else {
10686                         start_offset = 0;
10687                 }
10688                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10689                 start_offset += entry->offset;
10690                 end_offset += entry->offset;
10691
10692                 object = entry->object.vm_object;
10693                 if (object != VM_OBJECT_NULL) {
10694                         vm_object_lock(object);
10695                         vm_object_reuse_pages(object, start_offset, end_offset,
10696                                               TRUE);
10697                         vm_object_unlock(object);
10698                 }
10699
10700                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10701                         /*
10702                          * XXX
10703                          * We do not hold the VM map exclusively here.
10704                          * The "alias" field is not that critical, so it's
10705                          * safe to update it here, as long as it is the only
10706                          * one that can be modified while holding the VM map
10707                          * "shared".
10708                          */
10709                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10710                 }
10711         }
10712
10713         vm_map_unlock_read(map);
10714         vm_page_stats_reusable.reuse_pages_success++;
10715         return KERN_SUCCESS;
10716 }
10717
10718
10719 static kern_return_t
10720 vm_map_reusable_pages(
10721         vm_map_t        map,
10722         vm_map_offset_t start,
10723         vm_map_offset_t end)
10724 {
10725         vm_map_entry_t                  entry;
10726         vm_object_t                     object;
10727         vm_object_offset_t              start_offset, end_offset;
10728
10729         /*
10730          * The MADV_REUSABLE operation doesn't require any changes to the
10731          * vm_map_entry_t's, so the read lock is sufficient.
10732          */
10733
10734         vm_map_lock_read(map);
10735
10736         /*
10737          * The madvise semantics require that the address range be fully
10738          * allocated with no holes.  Otherwise, we're required to return
10739          * an error.
10740          */
10741
10742         if (!vm_map_range_check(map, start, end, &entry)) {
10743                 vm_map_unlock_read(map);
10744                 vm_page_stats_reusable.reusable_pages_failure++;
10745                 return KERN_INVALID_ADDRESS;
10746         }
10747
10748         /*
10749          * Examine each vm_map_entry_t in the range.
10750          */
10751         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10752              entry = entry->vme_next) {
10753                 int kill_pages = 0;
10754
10755                 /*
10756                  * Sanity check on the VM map entry.
10757                  */
10758                 if (! vm_map_entry_is_reusable(entry)) {
10759                         vm_map_unlock_read(map);
10760                         vm_page_stats_reusable.reusable_pages_failure++;
10761                         return KERN_INVALID_ADDRESS;
10762                 }
10763
10764                 /*
10765                  * The first time through, the start address could be anywhere
10766                  * within the vm_map_entry we found.  So adjust the offset to
10767                  * correspond.
10768                  */
10769                 if (entry->vme_start < start) {
10770                         start_offset = start - entry->vme_start;
10771                 } else {
10772                         start_offset = 0;
10773                 }
10774                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10775                 start_offset += entry->offset;
10776                 end_offset += entry->offset;
10777
10778                 object = entry->object.vm_object;
10779                 if (object == VM_OBJECT_NULL)
10780                         continue;
10781
10782
10783                 vm_object_lock(object);
10784                 if (object->ref_count == 1 && !object->shadow)
10785                         kill_pages = 1;
10786                 else
10787                         kill_pages = -1;
10788                 if (kill_pages != -1) {
10789                         vm_object_deactivate_pages(object,
10790                                                    start_offset,
10791                                                    end_offset - start_offset,
10792                                                    kill_pages,
10793                                                    TRUE /*reusable_pages*/);
10794                 } else {
10795                         vm_page_stats_reusable.reusable_pages_shared++;
10796                 }
10797                 vm_object_unlock(object);
10798
10799                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10800                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10801                         /*
10802                          * XXX
10803                          * We do not hold the VM map exclusively here.
10804                          * The "alias" field is not that critical, so it's
10805                          * safe to update it here, as long as it is the only
10806                          * one that can be modified while holding the VM map
10807                          * "shared".
10808                          */
10809                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10810                 }
10811         }
10812
10813         vm_map_unlock_read(map);
10814         vm_page_stats_reusable.reusable_pages_success++;
10815         return KERN_SUCCESS;
10816 }
10817
10818
10819 static kern_return_t
10820 vm_map_can_reuse(
10821         vm_map_t        map,
10822         vm_map_offset_t start,
10823         vm_map_offset_t end)
10824 {
10825         vm_map_entry_t                  entry;
10826
10827         /*
10828          * The MADV_REUSABLE operation doesn't require any changes to the
10829          * vm_map_entry_t's, so the read lock is sufficient.
10830          */
10831
10832         vm_map_lock_read(map);
10833
10834         /*
10835          * The madvise semantics require that the address range be fully
10836          * allocated with no holes.  Otherwise, we're required to return
10837          * an error.
10838          */
10839
10840         if (!vm_map_range_check(map, start, end, &entry)) {
10841                 vm_map_unlock_read(map);
10842                 vm_page_stats_reusable.can_reuse_failure++;
10843                 return KERN_INVALID_ADDRESS;
10844         }
10845
10846         /*
10847          * Examine each vm_map_entry_t in the range.
10848          */
10849         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10850              entry = entry->vme_next) {
10851                 /*
10852                  * Sanity check on the VM map entry.
10853                  */
10854                 if (! vm_map_entry_is_reusable(entry)) {
10855                         vm_map_unlock_read(map);
10856                         vm_page_stats_reusable.can_reuse_failure++;
10857                         return KERN_INVALID_ADDRESS;
10858                 }
10859         }
10860
10861         vm_map_unlock_read(map);
10862         vm_page_stats_reusable.can_reuse_success++;
10863         return KERN_SUCCESS;
10864 }
10865
10866
10867
10868 #include <mach_kdb.h>
10869 #if     MACH_KDB
10870 #include <ddb/db_output.h>
10871 #include <vm/vm_print.h>
10872
10873 #define printf  db_printf
10874
10875 /*
10876  * Forward declarations for internal functions.
10877  */
10878 extern void vm_map_links_print(
10879         struct vm_map_links     *links);
10880
10881 extern void vm_map_header_print(
10882         struct vm_map_header    *header);
10883
10884 extern void vm_map_entry_print(
10885         vm_map_entry_t          entry);
10886
10887 extern void vm_follow_entry(
10888         vm_map_entry_t          entry);
10889
10890 extern void vm_follow_map(
10891         vm_map_t                map);
10892
10893 /*
10894  *      vm_map_links_print:     [ debug ]
10895  */
10896 void
10897 vm_map_links_print(
10898         struct vm_map_links     *links)
10899 {
10900         iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
10901                 links->prev,
10902                 links->next,
10903                 (unsigned long long)links->start,
10904                 (unsigned long long)links->end);
10905 }
10906
10907 /*
10908  *      vm_map_header_print:    [ debug ]
10909  */
10910 void
10911 vm_map_header_print(
10912         struct vm_map_header    *header)
10913 {
10914         vm_map_links_print(&header->links);
10915         iprintf("nentries = %08X, %sentries_pageable\n",
10916                 header->nentries,
10917                 (header->entries_pageable ? "" : "!"));
10918 }
10919
10920 /*
10921  *      vm_follow_entry:        [ debug ]
10922  */
10923 void
10924 vm_follow_entry(
10925         vm_map_entry_t entry)
10926 {
10927         int shadows;
10928
10929         iprintf("map entry %08X\n", entry);
10930
10931         db_indent += 2;
10932
10933         shadows = vm_follow_object(entry->object.vm_object);
10934         iprintf("Total objects : %d\n",shadows);
10935
10936         db_indent -= 2;
10937 }
10938
10939 /*
10940  *      vm_map_entry_print:     [ debug ]
10941  */
10942 void
10943 vm_map_entry_print(
10944         register vm_map_entry_t entry)
10945 {
10946         static const char *inheritance_name[4] =
10947                 { "share", "copy", "none", "?"};
10948         static const char *behavior_name[4] =
10949                 { "dflt", "rand", "seqtl", "rseqntl" };
10950
10951         iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10952
10953         db_indent += 2;
10954
10955         vm_map_links_print(&entry->links);
10956
10957         iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
10958                 (unsigned long long)entry->vme_start,
10959                 (unsigned long long)entry->vme_end,
10960                 entry->protection,
10961                 entry->max_protection,
10962                 inheritance_name[(entry->inheritance & 0x3)]);
10963
10964         iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10965                 behavior_name[(entry->behavior & 0x3)],
10966                 entry->wired_count,
10967                 entry->user_wired_count);
10968         iprintf("%sin_transition, %sneeds_wakeup\n",
10969                 (entry->in_transition ? "" : "!"),
10970                 (entry->needs_wakeup ? "" : "!"));
10971
10972         if (entry->is_sub_map) {
10973                 iprintf("submap = %08X - offset = %016llX\n",
10974                         entry->object.sub_map,
10975                         (unsigned long long)entry->offset);
10976         } else {
10977                 iprintf("object = %08X  offset = %016llX - ",
10978                         entry->object.vm_object,
10979                         (unsigned long long)entry->offset);
10980                 printf("%sis_shared, %sneeds_copy\n",
10981                        (entry->is_shared ? "" : "!"),
10982                        (entry->needs_copy ? "" : "!"));
10983         }
10984
10985         db_indent -= 2;
10986 }
10987
10988 /*
10989  *      vm_follow_map:  [ debug ]
10990  */
10991 void
10992 vm_follow_map(
10993         vm_map_t map)
10994 {
10995         register vm_map_entry_t entry;
10996
10997         iprintf("task map %08X\n", map);
10998
10999         db_indent += 2;
11000
11001         for (entry = vm_map_first_entry(map);
11002              entry && entry != vm_map_to_entry(map);
11003              entry = entry->vme_next) {
11004                 vm_follow_entry(entry);
11005         }
11006
11007         db_indent -= 2;
11008 }
11009
11010 /*
11011  *      vm_map_print:   [ debug ]
11012  */
11013 void
11014 vm_map_print(
11015         db_addr_t inmap)
11016 {
11017         register vm_map_entry_t entry;
11018         vm_map_t map;
11019 #if TASK_SWAPPER
11020         char *swstate;
11021 #endif /* TASK_SWAPPER */
11022
11023         map = (vm_map_t)(long)
11024                 inmap;  /* Make sure we have the right type */
11025
11026         iprintf("task map %08X\n", map);
11027
11028         db_indent += 2;
11029
11030         vm_map_header_print(&map->hdr);
11031
11032         iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
11033                 map->pmap,
11034                 map->size,
11035                 map->ref_count,
11036                 map->hint,
11037                 map->first_free);
11038
11039         iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
11040                 (map->wait_for_space ? "" : "!"),
11041                 (map->wiring_required ? "" : "!"),
11042                 map->timestamp);
11043
11044 #if     TASK_SWAPPER
11045         switch (map->sw_state) {
11046         case MAP_SW_IN:
11047                 swstate = "SW_IN";
11048                 break;
11049         case MAP_SW_OUT:
11050                 swstate = "SW_OUT";
11051                 break;
11052         default:
11053                 swstate = "????";
11054                 break;
11055         }
11056         iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
11057 #endif  /* TASK_SWAPPER */
11058
11059         for (entry = vm_map_first_entry(map);
11060              entry && entry != vm_map_to_entry(map);
11061              entry = entry->vme_next) {
11062                 vm_map_entry_print(entry);
11063         }
11064
11065         db_indent -= 2;
11066 }
11067
11068 /*
11069  *      Routine:        vm_map_copy_print
11070  *      Purpose:
11071  *              Pretty-print a copy object for ddb.
11072  */
11073
11074 void
11075 vm_map_copy_print(
11076         db_addr_t       incopy)
11077 {
11078         vm_map_copy_t copy;
11079         vm_map_entry_t entry;
11080
11081         copy = (vm_map_copy_t)(long)
11082                 incopy; /* Make sure we have the right type */
11083
11084         printf("copy object 0x%x\n", copy);
11085
11086         db_indent += 2;
11087
11088         iprintf("type=%d", copy->type);
11089         switch (copy->type) {
11090         case VM_MAP_COPY_ENTRY_LIST:
11091                 printf("[entry_list]");
11092                 break;
11093
11094         case VM_MAP_COPY_OBJECT:
11095                 printf("[object]");
11096                 break;
11097
11098         case VM_MAP_COPY_KERNEL_BUFFER:
11099                 printf("[kernel_buffer]");
11100                 break;
11101
11102         default:
11103                 printf("[bad type]");
11104                 break;
11105         }
11106         printf(", offset=0x%llx", (unsigned long long)copy->offset);
11107         printf(", size=0x%x\n", copy->size);
11108
11109         switch (copy->type) {
11110         case VM_MAP_COPY_ENTRY_LIST:
11111                 vm_map_header_print(&copy->cpy_hdr);
11112                 for (entry = vm_map_copy_first_entry(copy);
11113                      entry && entry != vm_map_copy_to_entry(copy);
11114                      entry = entry->vme_next) {
11115                         vm_map_entry_print(entry);
11116                 }
11117                 break;
11118
11119         case VM_MAP_COPY_OBJECT:
11120                 iprintf("object=0x%x\n", copy->cpy_object);
11121                 break;
11122
11123         case VM_MAP_COPY_KERNEL_BUFFER:
11124                 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
11125                 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
11126                 break;
11127
11128         }
11129
11130         db_indent -=2;
11131 }
11132
11133 /*
11134  *      db_vm_map_total_size(map)       [ debug ]
11135  *
11136  *      return the total virtual size (in bytes) of the map
11137  */
11138 vm_map_size_t
11139 db_vm_map_total_size(
11140         db_addr_t       inmap)
11141 {
11142         vm_map_entry_t  entry;
11143         vm_map_size_t   total;
11144         vm_map_t map;
11145
11146         map = (vm_map_t)(long)
11147                 inmap;  /* Make sure we have the right type */
11148
11149         total = 0;
11150         for (entry = vm_map_first_entry(map);
11151              entry != vm_map_to_entry(map);
11152              entry = entry->vme_next) {
11153                 total += entry->vme_end - entry->vme_start;
11154         }
11155
11156         return total;
11157 }
11158
11159 #endif  /* MACH_KDB */
11160
11161 /*
11162  *      Routine:        vm_map_entry_insert
11163  *
11164  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
11165  */
11166 vm_map_entry_t
11167 vm_map_entry_insert(
11168         vm_map_t                map,
11169         vm_map_entry_t          insp_entry,
11170         vm_map_offset_t         start,
11171         vm_map_offset_t         end,
11172         vm_object_t             object,
11173         vm_object_offset_t      offset,
11174         boolean_t               needs_copy,
11175         boolean_t               is_shared,
11176         boolean_t               in_transition,
11177         vm_prot_t               cur_protection,
11178         vm_prot_t               max_protection,
11179         vm_behavior_t           behavior,
11180         vm_inherit_t            inheritance,
11181         unsigned                wired_count,
11182         boolean_t               no_cache,
11183         boolean_t               permanent,
11184         unsigned int            superpage_size)
11185 {
11186         vm_map_entry_t  new_entry;
11187
11188         assert(insp_entry != (vm_map_entry_t)0);
11189
11190         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11191
11192         new_entry->vme_start = start;
11193         new_entry->vme_end = end;
11194         assert(page_aligned(new_entry->vme_start));
11195         assert(page_aligned(new_entry->vme_end));
11196         assert(new_entry->vme_start < new_entry->vme_end);
11197
11198         new_entry->object.vm_object = object;
11199         new_entry->offset = offset;
11200         new_entry->is_shared = is_shared;
11201         new_entry->is_sub_map = FALSE;
11202         new_entry->needs_copy = needs_copy;
11203         new_entry->in_transition = in_transition;
11204         new_entry->needs_wakeup = FALSE;
11205         new_entry->inheritance = inheritance;
11206         new_entry->protection = cur_protection;
11207         new_entry->max_protection = max_protection;
11208         new_entry->behavior = behavior;
11209         new_entry->wired_count = wired_count;
11210         new_entry->user_wired_count = 0;
11211         new_entry->use_pmap = FALSE;
11212         new_entry->alias = 0;
11213         new_entry->zero_wired_pages = FALSE;
11214         new_entry->no_cache = no_cache;
11215         new_entry->permanent = permanent;
11216         new_entry->superpage_size = superpage_size;
11217         new_entry->used_for_jit = FALSE;
11218
11219         /*
11220          *      Insert the new entry into the list.
11221          */
11222
11223         vm_map_store_entry_link(map, insp_entry, new_entry);
11224         map->size += end - start;
11225
11226         /*
11227          *      Update the free space hint and the lookup hint.
11228          */
11229
11230         SAVE_HINT_MAP_WRITE(map, new_entry);
11231         return new_entry;
11232 }
11233
11234 /*
11235  *      Routine:        vm_map_remap_extract
11236  *
11237  *      Descritpion:    This routine returns a vm_entry list from a map.
11238  */
11239 static kern_return_t
11240 vm_map_remap_extract(
11241         vm_map_t                map,
11242         vm_map_offset_t         addr,
11243         vm_map_size_t           size,
11244         boolean_t               copy,
11245         struct vm_map_header    *map_header,
11246         vm_prot_t               *cur_protection,
11247         vm_prot_t               *max_protection,
11248         /* What, no behavior? */
11249         vm_inherit_t            inheritance,
11250         boolean_t               pageable)
11251 {
11252         kern_return_t           result;
11253         vm_map_size_t           mapped_size;
11254         vm_map_size_t           tmp_size;
11255         vm_map_entry_t          src_entry;     /* result of last map lookup */
11256         vm_map_entry_t          new_entry;
11257         vm_object_offset_t      offset;
11258         vm_map_offset_t         map_address;
11259         vm_map_offset_t         src_start;     /* start of entry to map */
11260         vm_map_offset_t         src_end;       /* end of region to be mapped */
11261         vm_object_t             object;
11262         vm_map_version_t        version;
11263         boolean_t               src_needs_copy;
11264         boolean_t               new_entry_needs_copy;
11265
11266         assert(map != VM_MAP_NULL);
11267         assert(size != 0 && size == vm_map_round_page(size));
11268         assert(inheritance == VM_INHERIT_NONE ||
11269                inheritance == VM_INHERIT_COPY ||
11270                inheritance == VM_INHERIT_SHARE);
11271
11272         /*
11273          *      Compute start and end of region.
11274          */
11275         src_start = vm_map_trunc_page(addr);
11276         src_end = vm_map_round_page(src_start + size);
11277
11278         /*
11279          *      Initialize map_header.
11280          */
11281         map_header->links.next = (struct vm_map_entry *)&map_header->links;
11282         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11283         map_header->nentries = 0;
11284         map_header->entries_pageable = pageable;
11285
11286         vm_map_store_init( map_header );
11287
11288         *cur_protection = VM_PROT_ALL;
11289         *max_protection = VM_PROT_ALL;
11290
11291         map_address = 0;
11292         mapped_size = 0;
11293         result = KERN_SUCCESS;
11294
11295         /*
11296          *      The specified source virtual space might correspond to
11297          *      multiple map entries, need to loop on them.
11298          */
11299         vm_map_lock(map);
11300         while (mapped_size != size) {
11301                 vm_map_size_t   entry_size;
11302
11303                 /*
11304                  *      Find the beginning of the region.
11305                  */
11306                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11307                         result = KERN_INVALID_ADDRESS;
11308                         break;
11309                 }
11310
11311                 if (src_start < src_entry->vme_start ||
11312                     (mapped_size && src_start != src_entry->vme_start)) {
11313                         result = KERN_INVALID_ADDRESS;
11314                         break;
11315                 }
11316
11317                 tmp_size = size - mapped_size;
11318                 if (src_end > src_entry->vme_end)
11319                         tmp_size -= (src_end - src_entry->vme_end);
11320
11321                 entry_size = (vm_map_size_t)(src_entry->vme_end -
11322                                              src_entry->vme_start);
11323
11324                 if(src_entry->is_sub_map) {
11325                         vm_map_reference(src_entry->object.sub_map);
11326                         object = VM_OBJECT_NULL;
11327                 } else {
11328                         object = src_entry->object.vm_object;
11329
11330                         if (object == VM_OBJECT_NULL) {
11331                                 object = vm_object_allocate(entry_size);
11332                                 src_entry->offset = 0;
11333                                 src_entry->object.vm_object = object;
11334                         } else if (object->copy_strategy !=
11335                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11336                                 /*
11337                                  *      We are already using an asymmetric
11338                                  *      copy, and therefore we already have
11339                                  *      the right object.
11340                                  */
11341                                 assert(!src_entry->needs_copy);
11342                         } else if (src_entry->needs_copy || object->shadowed ||
11343                                    (object->internal && !object->true_share &&
11344                                     !src_entry->is_shared &&
11345                                     object->vo_size > entry_size)) {
11346
11347                                 vm_object_shadow(&src_entry->object.vm_object,
11348                                                  &src_entry->offset,
11349                                                  entry_size);
11350
11351                                 if (!src_entry->needs_copy &&
11352                                     (src_entry->protection & VM_PROT_WRITE)) {
11353                                         vm_prot_t prot;
11354
11355                                         prot = src_entry->protection & ~VM_PROT_WRITE;
11356
11357                                         if (override_nx(map, src_entry->alias) && prot)
11358                                                 prot |= VM_PROT_EXECUTE;
11359
11360                                         if(map->mapped) {
11361                                                 vm_object_pmap_protect(
11362                                                         src_entry->object.vm_object,
11363                                                         src_entry->offset,
11364                                                         entry_size,
11365                                                         PMAP_NULL,
11366                                                         src_entry->vme_start,
11367                                                         prot);
11368                                         } else {
11369                                                 pmap_protect(vm_map_pmap(map),
11370                                                              src_entry->vme_start,
11371                                                              src_entry->vme_end,
11372                                                              prot);
11373                                         }
11374                                 }
11375
11376                                 object = src_entry->object.vm_object;
11377                                 src_entry->needs_copy = FALSE;
11378                         }
11379
11380
11381                         vm_object_lock(object);
11382                         vm_object_reference_locked(object); /* object ref. for new entry */
11383                         if (object->copy_strategy ==
11384                             MEMORY_OBJECT_COPY_SYMMETRIC) {
11385                                 object->copy_strategy =
11386                                         MEMORY_OBJECT_COPY_DELAY;
11387                         }
11388                         vm_object_unlock(object);
11389                 }
11390
11391                 offset = src_entry->offset + (src_start - src_entry->vme_start);
11392
11393                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11394                 vm_map_entry_copy(new_entry, src_entry);
11395                 new_entry->use_pmap = FALSE; /* clr address space specifics */
11396
11397                 new_entry->vme_start = map_address;
11398                 new_entry->vme_end = map_address + tmp_size;
11399                 assert(new_entry->vme_start < new_entry->vme_end);
11400                 new_entry->inheritance = inheritance;
11401                 new_entry->offset = offset;
11402
11403                 /*
11404                  * The new region has to be copied now if required.
11405                  */
11406         RestartCopy:
11407                 if (!copy) {
11408                         src_entry->is_shared = TRUE;
11409                         new_entry->is_shared = TRUE;
11410                         if (!(new_entry->is_sub_map))
11411                                 new_entry->needs_copy = FALSE;
11412
11413                 } else if (src_entry->is_sub_map) {
11414                         /* make this a COW sub_map if not already */
11415                         new_entry->needs_copy = TRUE;
11416                         object = VM_OBJECT_NULL;
11417                 } else if (src_entry->wired_count == 0 &&
11418                            vm_object_copy_quickly(&new_entry->object.vm_object,
11419                                                   new_entry->offset,
11420                                                   (new_entry->vme_end -
11421                                                    new_entry->vme_start),
11422                                                   &src_needs_copy,
11423                                                   &new_entry_needs_copy)) {
11424
11425                         new_entry->needs_copy = new_entry_needs_copy;
11426                         new_entry->is_shared = FALSE;
11427
11428                         /*
11429                          * Handle copy_on_write semantics.
11430                          */
11431                         if (src_needs_copy && !src_entry->needs_copy) {
11432                                 vm_prot_t prot;
11433
11434                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11435
11436                                 if (override_nx(map, src_entry->alias) && prot)
11437                                         prot |= VM_PROT_EXECUTE;
11438
11439                                 vm_object_pmap_protect(object,
11440                                                        offset,
11441                                                        entry_size,
11442                                                        ((src_entry->is_shared
11443                                                          || map->mapped) ?
11444                                                         PMAP_NULL : map->pmap),
11445                                                        src_entry->vme_start,
11446                                                        prot);
11447
11448                                 src_entry->needs_copy = TRUE;
11449                         }
11450                         /*
11451                          * Throw away the old object reference of the new entry.
11452                          */
11453                         vm_object_deallocate(object);
11454
11455                 } else {
11456                         new_entry->is_shared = FALSE;
11457
11458                         /*
11459                          * The map can be safely unlocked since we
11460                          * already hold a reference on the object.
11461                          *
11462                          * Record the timestamp of the map for later
11463                          * verification, and unlock the map.
11464                          */
11465                         version.main_timestamp = map->timestamp;
11466                         vm_map_unlock(map);     /* Increments timestamp once! */
11467
11468                         /*
11469                          * Perform the copy.
11470                          */
11471                         if (src_entry->wired_count > 0) {
11472                                 vm_object_lock(object);
11473                                 result = vm_object_copy_slowly(
11474                                         object,
11475                                         offset,
11476                                         entry_size,
11477                                         THREAD_UNINT,
11478                                         &new_entry->object.vm_object);
11479
11480                                 new_entry->offset = 0;
11481                                 new_entry->needs_copy = FALSE;
11482                         } else {
11483                                 result = vm_object_copy_strategically(
11484                                         object,
11485                                         offset,
11486                                         entry_size,
11487                                         &new_entry->object.vm_object,
11488                                         &new_entry->offset,
11489                                         &new_entry_needs_copy);
11490
11491                                 new_entry->needs_copy = new_entry_needs_copy;
11492                         }
11493
11494                         /*
11495                          * Throw away the old object reference of the new entry.
11496                          */
11497                         vm_object_deallocate(object);
11498
11499                         if (result != KERN_SUCCESS &&
11500                             result != KERN_MEMORY_RESTART_COPY) {
11501                                 _vm_map_entry_dispose(map_header, new_entry);
11502                                 break;
11503                         }
11504
11505                         /*
11506                          * Verify that the map has not substantially
11507                          * changed while the copy was being made.
11508                          */
11509
11510                         vm_map_lock(map);
11511                         if (version.main_timestamp + 1 != map->timestamp) {
11512                                 /*
11513                                  * Simple version comparison failed.
11514                                  *
11515                                  * Retry the lookup and verify that the
11516                                  * same object/offset are still present.
11517                                  */
11518                                 vm_object_deallocate(new_entry->
11519                                                      object.vm_object);
11520                                 _vm_map_entry_dispose(map_header, new_entry);
11521                                 if (result == KERN_MEMORY_RESTART_COPY)
11522                                         result = KERN_SUCCESS;
11523                                 continue;
11524                         }
11525
11526                         if (result == KERN_MEMORY_RESTART_COPY) {
11527                                 vm_object_reference(object);
11528                                 goto RestartCopy;
11529                         }
11530                 }
11531
11532                 _vm_map_store_entry_link(map_header,
11533                                    map_header->links.prev, new_entry);
11534
11535                 /*Protections for submap mapping are irrelevant here*/
11536                 if( !src_entry->is_sub_map ) {
11537                         *cur_protection &= src_entry->protection;
11538                         *max_protection &= src_entry->max_protection;
11539                 }
11540                 map_address += tmp_size;
11541                 mapped_size += tmp_size;
11542                 src_start += tmp_size;
11543
11544         } /* end while */
11545
11546         vm_map_unlock(map);
11547         if (result != KERN_SUCCESS) {
11548                 /*
11549                  * Free all allocated elements.
11550                  */
11551                 for (src_entry = map_header->links.next;
11552                      src_entry != (struct vm_map_entry *)&map_header->links;
11553                      src_entry = new_entry) {
11554                         new_entry = src_entry->vme_next;
11555                         _vm_map_store_entry_unlink(map_header, src_entry);
11556                         vm_object_deallocate(src_entry->object.vm_object);
11557                         _vm_map_entry_dispose(map_header, src_entry);
11558                 }
11559         }
11560         return result;
11561 }
11562
11563 /*
11564  *      Routine:        vm_remap
11565  *
11566  *                      Map portion of a task's address space.
11567  *                      Mapped region must not overlap more than
11568  *                      one vm memory object. Protections and
11569  *                      inheritance attributes remain the same
11570  *                      as in the original task and are out parameters.
11571  *                      Source and Target task can be identical
11572  *                      Other attributes are identical as for vm_map()
11573  */
11574 kern_return_t
11575 vm_map_remap(
11576         vm_map_t                target_map,
11577         vm_map_address_t        *address,
11578         vm_map_size_t           size,
11579         vm_map_offset_t         mask,
11580         int                     flags,
11581         vm_map_t                src_map,
11582         vm_map_offset_t         memory_address,
11583         boolean_t               copy,
11584         vm_prot_t               *cur_protection,
11585         vm_prot_t               *max_protection,
11586         vm_inherit_t            inheritance)
11587 {
11588         kern_return_t           result;
11589         vm_map_entry_t          entry;
11590         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11591         vm_map_entry_t          new_entry;
11592         struct vm_map_header    map_header;
11593
11594         if (target_map == VM_MAP_NULL)
11595                 return KERN_INVALID_ARGUMENT;
11596
11597         switch (inheritance) {
11598         case VM_INHERIT_NONE:
11599         case VM_INHERIT_COPY:
11600         case VM_INHERIT_SHARE:
11601                 if (size != 0 && src_map != VM_MAP_NULL)
11602                         break;
11603                 /*FALL THRU*/
11604         default:
11605                 return KERN_INVALID_ARGUMENT;
11606         }
11607
11608         size = vm_map_round_page(size);
11609
11610         result = vm_map_remap_extract(src_map, memory_address,
11611                                       size, copy, &map_header,
11612                                       cur_protection,
11613                                       max_protection,
11614                                       inheritance,
11615                                       target_map->hdr.
11616                                       entries_pageable);
11617
11618         if (result != KERN_SUCCESS) {
11619                 return result;
11620         }
11621
11622         /*
11623          * Allocate/check a range of free virtual address
11624          * space for the target
11625          */
11626         *address = vm_map_trunc_page(*address);
11627         vm_map_lock(target_map);
11628         result = vm_map_remap_range_allocate(target_map, address, size,
11629                                              mask, flags, &insp_entry);
11630
11631         for (entry = map_header.links.next;
11632              entry != (struct vm_map_entry *)&map_header.links;
11633              entry = new_entry) {
11634                 new_entry = entry->vme_next;
11635                 _vm_map_store_entry_unlink(&map_header, entry);
11636                 if (result == KERN_SUCCESS) {
11637                         entry->vme_start += *address;
11638                         entry->vme_end += *address;
11639                         vm_map_store_entry_link(target_map, insp_entry, entry);
11640                         insp_entry = entry;
11641                 } else {
11642                         if (!entry->is_sub_map) {
11643                                 vm_object_deallocate(entry->object.vm_object);
11644                         } else {
11645                                 vm_map_deallocate(entry->object.sub_map);
11646                         }
11647                         _vm_map_entry_dispose(&map_header, entry);
11648                 }
11649         }
11650
11651         if( target_map->disable_vmentry_reuse == TRUE) {
11652                 if( target_map->highest_entry_end < insp_entry->vme_end ){
11653                         target_map->highest_entry_end = insp_entry->vme_end;
11654                 }
11655         }
11656
11657         if (result == KERN_SUCCESS) {
11658                 target_map->size += size;
11659                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11660         }
11661         vm_map_unlock(target_map);
11662
11663         if (result == KERN_SUCCESS && target_map->wiring_required)
11664                 result = vm_map_wire(target_map, *address,
11665                                      *address + size, *cur_protection, TRUE);
11666         return result;
11667 }
11668
11669 /*
11670  *      Routine:        vm_map_remap_range_allocate
11671  *
11672  *      Description:
11673  *              Allocate a range in the specified virtual address map.
11674  *              returns the address and the map entry just before the allocated
11675  *              range
11676  *
11677  *      Map must be locked.
11678  */
11679
11680 static kern_return_t
11681 vm_map_remap_range_allocate(
11682         vm_map_t                map,
11683         vm_map_address_t        *address,       /* IN/OUT */
11684         vm_map_size_t           size,
11685         vm_map_offset_t         mask,
11686         int                     flags,
11687         vm_map_entry_t          *map_entry)     /* OUT */
11688 {
11689         vm_map_entry_t  entry;
11690         vm_map_offset_t start;
11691         vm_map_offset_t end;
11692         kern_return_t   kr;
11693
11694 StartAgain: ;
11695
11696         start = *address;
11697
11698         if (flags & VM_FLAGS_ANYWHERE)
11699         {
11700                 /*
11701                  *      Calculate the first possible address.
11702                  */
11703
11704                 if (start < map->min_offset)
11705                         start = map->min_offset;
11706                 if (start > map->max_offset)
11707                         return(KERN_NO_SPACE);
11708
11709                 /*
11710                  *      Look for the first possible address;
11711                  *      if there's already something at this
11712                  *      address, we have to start after it.
11713                  */
11714
11715                 if( map->disable_vmentry_reuse == TRUE) {
11716                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
11717                 } else {
11718                         assert(first_free_is_valid(map));
11719                         if (start == map->min_offset) {
11720                                 if ((entry = map->first_free) != vm_map_to_entry(map))
11721                                         start = entry->vme_end;
11722                         } else {
11723                                 vm_map_entry_t  tmp_entry;
11724                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
11725                                         start = tmp_entry->vme_end;
11726                                 entry = tmp_entry;
11727                         }
11728                 }
11729
11730                 /*
11731                  *      In any case, the "entry" always precedes
11732                  *      the proposed new region throughout the
11733                  *      loop:
11734                  */
11735
11736                 while (TRUE) {
11737                         register vm_map_entry_t next;
11738
11739                         /*
11740                          *      Find the end of the proposed new region.
11741                          *      Be sure we didn't go beyond the end, or
11742                          *      wrap around the address.
11743                          */
11744
11745                         end = ((start + mask) & ~mask);
11746                         if (end < start)
11747                                 return(KERN_NO_SPACE);
11748                         start = end;
11749                         end += size;
11750
11751                         if ((end > map->max_offset) || (end < start)) {
11752                                 if (map->wait_for_space) {
11753                                         if (size <= (map->max_offset -
11754                                                      map->min_offset)) {
11755                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11756                                                 vm_map_unlock(map);
11757                                                 thread_block(THREAD_CONTINUE_NULL);
11758                                                 vm_map_lock(map);
11759                                                 goto StartAgain;
11760                                         }
11761                                 }
11762
11763                                 return(KERN_NO_SPACE);
11764                         }
11765
11766                         /*
11767                          *      If there are no more entries, we must win.
11768                          */
11769
11770                         next = entry->vme_next;
11771                         if (next == vm_map_to_entry(map))
11772                                 break;
11773
11774                         /*
11775                          *      If there is another entry, it must be
11776                          *      after the end of the potential new region.
11777                          */
11778
11779                         if (next->vme_start >= end)
11780                                 break;
11781
11782                         /*
11783                          *      Didn't fit -- move to the next entry.
11784                          */
11785
11786                         entry = next;
11787                         start = entry->vme_end;
11788                 }
11789                 *address = start;
11790         } else {
11791                 vm_map_entry_t          temp_entry;
11792
11793                 /*
11794                  *      Verify that:
11795                  *              the address doesn't itself violate
11796                  *              the mask requirement.
11797                  */
11798
11799                 if ((start & mask) != 0)
11800                         return(KERN_NO_SPACE);
11801
11802
11803                 /*
11804                  *      ...     the address is within bounds
11805                  */
11806
11807                 end = start + size;
11808
11809                 if ((start < map->min_offset) ||
11810                     (end > map->max_offset) ||
11811                     (start >= end)) {
11812                         return(KERN_INVALID_ADDRESS);
11813                 }
11814
11815                 /*
11816                  * If we're asked to overwrite whatever was mapped in that
11817                  * range, first deallocate that range.
11818                  */
11819                 if (flags & VM_FLAGS_OVERWRITE) {
11820                         vm_map_t zap_map;
11821
11822                         /*
11823                          * We use a "zap_map" to avoid having to unlock
11824                          * the "map" in vm_map_delete(), which would compromise
11825                          * the atomicity of the "deallocate" and then "remap"
11826                          * combination.
11827                          */
11828                         zap_map = vm_map_create(PMAP_NULL,
11829                                                 start,
11830                                                 end - start,
11831                                                 map->hdr.entries_pageable);
11832                         if (zap_map == VM_MAP_NULL) {
11833                                 return KERN_RESOURCE_SHORTAGE;
11834                         }
11835
11836                         kr = vm_map_delete(map, start, end,
11837                                            VM_MAP_REMOVE_SAVE_ENTRIES,
11838                                            zap_map);
11839                         if (kr == KERN_SUCCESS) {
11840                                 vm_map_destroy(zap_map,
11841                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11842                                 zap_map = VM_MAP_NULL;
11843                         }
11844                 }
11845
11846                 /*
11847                  *      ...     the starting address isn't allocated
11848                  */
11849
11850                 if (vm_map_lookup_entry(map, start, &temp_entry))
11851                         return(KERN_NO_SPACE);
11852
11853                 entry = temp_entry;
11854
11855                 /*
11856                  *      ...     the next region doesn't overlap the
11857                  *              end point.
11858                  */
11859
11860                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11861                     (entry->vme_next->vme_start < end))
11862                         return(KERN_NO_SPACE);
11863         }
11864         *map_entry = entry;
11865         return(KERN_SUCCESS);
11866 }
11867
11868 /*
11869  *      vm_map_switch:
11870  *
11871  *      Set the address map for the current thread to the specified map
11872  */
11873
11874 vm_map_t
11875 vm_map_switch(
11876         vm_map_t        map)
11877 {
11878         int             mycpu;
11879         thread_t        thread = current_thread();
11880         vm_map_t        oldmap = thread->map;
11881
11882         mp_disable_preemption();
11883         mycpu = cpu_number();
11884
11885         /*
11886          *      Deactivate the current map and activate the requested map
11887          */
11888         PMAP_SWITCH_USER(thread, map, mycpu);
11889
11890         mp_enable_preemption();
11891         return(oldmap);
11892 }
11893
11894
11895 /*
11896  *      Routine:        vm_map_write_user
11897  *
11898  *      Description:
11899  *              Copy out data from a kernel space into space in the
11900  *              destination map. The space must already exist in the
11901  *              destination map.
11902  *              NOTE:  This routine should only be called by threads
11903  *              which can block on a page fault. i.e. kernel mode user
11904  *              threads.
11905  *
11906  */
11907 kern_return_t
11908 vm_map_write_user(
11909         vm_map_t                map,
11910         void                    *src_p,
11911         vm_map_address_t        dst_addr,
11912         vm_size_t               size)
11913 {
11914         kern_return_t   kr = KERN_SUCCESS;
11915
11916         if(current_map() == map) {
11917                 if (copyout(src_p, dst_addr, size)) {
11918                         kr = KERN_INVALID_ADDRESS;
11919                 }
11920         } else {
11921                 vm_map_t        oldmap;
11922
11923                 /* take on the identity of the target map while doing */
11924                 /* the transfer */
11925
11926                 vm_map_reference(map);
11927                 oldmap = vm_map_switch(map);
11928                 if (copyout(src_p, dst_addr, size)) {
11929                         kr = KERN_INVALID_ADDRESS;
11930                 }
11931                 vm_map_switch(oldmap);
11932                 vm_map_deallocate(map);
11933         }
11934         return kr;
11935 }
11936
11937 /*
11938  *      Routine:        vm_map_read_user
11939  *
11940  *      Description:
11941  *              Copy in data from a user space source map into the
11942  *              kernel map. The space must already exist in the
11943  *              kernel map.
11944  *              NOTE:  This routine should only be called by threads
11945  *              which can block on a page fault. i.e. kernel mode user
11946  *              threads.
11947  *
11948  */
11949 kern_return_t
11950 vm_map_read_user(
11951         vm_map_t                map,
11952         vm_map_address_t        src_addr,
11953         void                    *dst_p,
11954         vm_size_t               size)
11955 {
11956         kern_return_t   kr = KERN_SUCCESS;
11957
11958         if(current_map() == map) {
11959                 if (copyin(src_addr, dst_p, size)) {
11960                         kr = KERN_INVALID_ADDRESS;
11961                 }
11962         } else {
11963                 vm_map_t        oldmap;
11964
11965                 /* take on the identity of the target map while doing */
11966                 /* the transfer */
11967
11968                 vm_map_reference(map);
11969                 oldmap = vm_map_switch(map);
11970                 if (copyin(src_addr, dst_p, size)) {
11971                         kr = KERN_INVALID_ADDRESS;
11972                 }
11973                 vm_map_switch(oldmap);
11974                 vm_map_deallocate(map);
11975         }
11976         return kr;
11977 }
11978
11979
11980 /*
11981  *      vm_map_check_protection:
11982  *
11983  *      Assert that the target map allows the specified
11984  *      privilege on the entire address region given.
11985  *      The entire region must be allocated.
11986  */
11987 boolean_t
11988 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11989                         vm_map_offset_t end, vm_prot_t protection)
11990 {
11991         vm_map_entry_t entry;
11992         vm_map_entry_t tmp_entry;
11993
11994         vm_map_lock(map);
11995
11996         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11997         {
11998                 vm_map_unlock(map);
11999                 return (FALSE);
12000         }
12001
12002         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
12003                 vm_map_unlock(map);
12004                 return(FALSE);
12005         }
12006
12007         entry = tmp_entry;
12008
12009         while (start < end) {
12010                 if (entry == vm_map_to_entry(map)) {
12011                         vm_map_unlock(map);
12012                         return(FALSE);
12013                 }
12014
12015                 /*
12016                  *      No holes allowed!
12017                  */
12018
12019                 if (start < entry->vme_start) {
12020                         vm_map_unlock(map);
12021                         return(FALSE);
12022                 }
12023
12024                 /*
12025                  * Check protection associated with entry.
12026                  */
12027
12028                 if ((entry->protection & protection) != protection) {
12029                         vm_map_unlock(map);
12030                         return(FALSE);
12031                 }
12032
12033                 /* go to next entry */
12034
12035                 start = entry->vme_end;
12036                 entry = entry->vme_next;
12037         }
12038         vm_map_unlock(map);
12039         return(TRUE);
12040 }
12041
12042 kern_return_t
12043 vm_map_purgable_control(
12044         vm_map_t                map,
12045         vm_map_offset_t         address,
12046         vm_purgable_t           control,
12047         int                     *state)
12048 {
12049         vm_map_entry_t          entry;
12050         vm_object_t             object;
12051         kern_return_t           kr;
12052
12053         /*
12054          * Vet all the input parameters and current type and state of the
12055          * underlaying object.  Return with an error if anything is amiss.
12056          */
12057         if (map == VM_MAP_NULL)
12058                 return(KERN_INVALID_ARGUMENT);
12059
12060         if (control != VM_PURGABLE_SET_STATE &&
12061             control != VM_PURGABLE_GET_STATE &&
12062             control != VM_PURGABLE_PURGE_ALL)
12063                 return(KERN_INVALID_ARGUMENT);
12064
12065         if (control == VM_PURGABLE_PURGE_ALL) {
12066                 vm_purgeable_object_purge_all();
12067                 return KERN_SUCCESS;
12068         }
12069
12070         if (control == VM_PURGABLE_SET_STATE &&
12071             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
12072              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
12073                 return(KERN_INVALID_ARGUMENT);
12074
12075         vm_map_lock_read(map);
12076
12077         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12078
12079                 /*
12080                  * Must pass a valid non-submap address.
12081                  */
12082                 vm_map_unlock_read(map);
12083                 return(KERN_INVALID_ADDRESS);
12084         }
12085
12086         if ((entry->protection & VM_PROT_WRITE) == 0) {
12087                 /*
12088                  * Can't apply purgable controls to something you can't write.
12089                  */
12090                 vm_map_unlock_read(map);
12091                 return(KERN_PROTECTION_FAILURE);
12092         }
12093
12094         object = entry->object.vm_object;
12095         if (object == VM_OBJECT_NULL) {
12096                 /*
12097                  * Object must already be present or it can't be purgable.
12098                  */
12099                 vm_map_unlock_read(map);
12100                 return KERN_INVALID_ARGUMENT;
12101         }
12102
12103         vm_object_lock(object);
12104
12105         if (entry->offset != 0 ||
12106             entry->vme_end - entry->vme_start != object->vo_size) {
12107                 /*
12108                  * Can only apply purgable controls to the whole (existing)
12109                  * object at once.
12110                  */
12111                 vm_map_unlock_read(map);
12112                 vm_object_unlock(object);
12113                 return KERN_INVALID_ARGUMENT;
12114         }
12115
12116         vm_map_unlock_read(map);
12117
12118         kr = vm_object_purgable_control(object, control, state);
12119
12120         vm_object_unlock(object);
12121
12122         return kr;
12123 }
12124
12125 kern_return_t
12126 vm_map_page_query_internal(
12127         vm_map_t        target_map,
12128         vm_map_offset_t offset,
12129         int             *disposition,
12130         int             *ref_count)
12131 {
12132         kern_return_t                   kr;
12133         vm_page_info_basic_data_t       info;
12134         mach_msg_type_number_t          count;
12135
12136         count = VM_PAGE_INFO_BASIC_COUNT;
12137         kr = vm_map_page_info(target_map,
12138                               offset,
12139                               VM_PAGE_INFO_BASIC,
12140                               (vm_page_info_t) &info,
12141                               &count);
12142         if (kr == KERN_SUCCESS) {
12143                 *disposition = info.disposition;
12144                 *ref_count = info.ref_count;
12145         } else {
12146                 *disposition = 0;
12147                 *ref_count = 0;
12148         }
12149
12150         return kr;
12151 }
12152
12153 kern_return_t
12154 vm_map_page_info(
12155         vm_map_t                map,
12156         vm_map_offset_t         offset,
12157         vm_page_info_flavor_t   flavor,
12158         vm_page_info_t          info,
12159         mach_msg_type_number_t  *count)
12160 {
12161         vm_map_entry_t          map_entry;
12162         vm_object_t             object;
12163         vm_page_t               m;
12164         kern_return_t           kr;
12165         kern_return_t           retval = KERN_SUCCESS;
12166         boolean_t               top_object;
12167         int                     disposition;
12168         int                     ref_count;
12169         vm_object_id_t          object_id;
12170         vm_page_info_basic_t    basic_info;
12171         int                     depth;
12172         vm_map_offset_t         offset_in_page;
12173
12174         switch (flavor) {
12175         case VM_PAGE_INFO_BASIC:
12176                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12177                         /*
12178                          * The "vm_page_info_basic_data" structure was not
12179                          * properly padded, so allow the size to be off by
12180                          * one to maintain backwards binary compatibility...
12181                          */
12182                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12183                                 return KERN_INVALID_ARGUMENT;
12184                 }
12185                 break;
12186         default:
12187                 return KERN_INVALID_ARGUMENT;
12188         }
12189
12190         disposition = 0;
12191         ref_count = 0;
12192         object_id = 0;
12193         top_object = TRUE;
12194         depth = 0;
12195
12196         retval = KERN_SUCCESS;
12197         offset_in_page = offset & PAGE_MASK;
12198         offset = vm_map_trunc_page(offset);
12199
12200         vm_map_lock_read(map);
12201
12202         /*
12203          * First, find the map entry covering "offset", going down
12204          * submaps if necessary.
12205          */
12206         for (;;) {
12207                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12208                         vm_map_unlock_read(map);
12209                         return KERN_INVALID_ADDRESS;
12210                 }
12211                 /* compute offset from this map entry's start */
12212                 offset -= map_entry->vme_start;
12213                 /* compute offset into this map entry's object (or submap) */
12214                 offset += map_entry->offset;
12215
12216                 if (map_entry->is_sub_map) {
12217                         vm_map_t sub_map;
12218
12219                         sub_map = map_entry->object.sub_map;
12220                         vm_map_lock_read(sub_map);
12221                         vm_map_unlock_read(map);
12222
12223                         map = sub_map;
12224
12225                         ref_count = MAX(ref_count, map->ref_count);
12226                         continue;
12227                 }
12228                 break;
12229         }
12230
12231         object = map_entry->object.vm_object;
12232         if (object == VM_OBJECT_NULL) {
12233                 /* no object -> no page */
12234                 vm_map_unlock_read(map);
12235                 goto done;
12236         }
12237
12238         vm_object_lock(object);
12239         vm_map_unlock_read(map);
12240
12241         /*
12242          * Go down the VM object shadow chain until we find the page
12243          * we're looking for.
12244          */
12245         for (;;) {
12246                 ref_count = MAX(ref_count, object->ref_count);
12247
12248                 m = vm_page_lookup(object, offset);
12249
12250                 if (m != VM_PAGE_NULL) {
12251                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12252                         break;
12253                 } else {
12254 #if MACH_PAGEMAP
12255                         if (object->existence_map) {
12256                                 if (vm_external_state_get(object->existence_map,
12257                                                           offset) ==
12258                                     VM_EXTERNAL_STATE_EXISTS) {
12259                                         /*
12260                                          * this page has been paged out
12261                                          */
12262                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12263                                         break;
12264                                 }
12265                         } else
12266 #endif
12267                         {
12268                                 if (object->internal &&
12269                                     object->alive &&
12270                                     !object->terminating &&
12271                                     object->pager_ready) {
12272
12273                                         memory_object_t pager;
12274
12275                                         vm_object_paging_begin(object);
12276                                         pager = object->pager;
12277                                         vm_object_unlock(object);
12278
12279                                         /*
12280                                          * Ask the default pager if
12281                                          * it has this page.
12282                                          */
12283                                         kr = memory_object_data_request(
12284                                                 pager,
12285                                                 offset + object->paging_offset,
12286                                                 0, /* just poke the pager */
12287                                                 VM_PROT_READ,
12288                                                 NULL);
12289
12290                                         vm_object_lock(object);
12291                                         vm_object_paging_end(object);
12292
12293                                         if (kr == KERN_SUCCESS) {
12294                                                 /* the default pager has it */
12295                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12296                                                 break;
12297                                         }
12298                                 }
12299                         }
12300
12301                         if (object->shadow != VM_OBJECT_NULL) {
12302                                 vm_object_t shadow;
12303
12304                                 offset += object->vo_shadow_offset;
12305                                 shadow = object->shadow;
12306
12307                                 vm_object_lock(shadow);
12308                                 vm_object_unlock(object);
12309
12310                                 object = shadow;
12311                                 top_object = FALSE;
12312                                 depth++;
12313                         } else {
12314 //                              if (!object->internal)
12315 //                                      break;
12316 //                              retval = KERN_FAILURE;
12317 //                              goto done_with_object;
12318                                 break;
12319                         }
12320                 }
12321         }
12322         /* The ref_count is not strictly accurate, it measures the number   */
12323         /* of entities holding a ref on the object, they may not be mapping */
12324         /* the object or may not be mapping the section holding the         */
12325         /* target page but its still a ball park number and though an over- */
12326         /* count, it picks up the copy-on-write cases                       */
12327
12328         /* We could also get a picture of page sharing from pmap_attributes */
12329         /* but this would under count as only faulted-in mappings would     */
12330         /* show up.                                                         */
12331
12332         if (top_object == TRUE && object->shadow)
12333                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12334
12335         if (! object->internal)
12336                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12337
12338         if (m == VM_PAGE_NULL)
12339                 goto done_with_object;
12340
12341         if (m->fictitious) {
12342                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12343                 goto done_with_object;
12344         }
12345         if (m->dirty || pmap_is_modified(m->phys_page))
12346                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12347
12348         if (m->reference || pmap_is_referenced(m->phys_page))
12349                 disposition |= VM_PAGE_QUERY_PAGE_REF;
12350
12351         if (m->speculative)
12352                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12353
12354         if (m->cs_validated)
12355                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12356         if (m->cs_tainted)
12357                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12358
12359 done_with_object:
12360         vm_object_unlock(object);
12361 done:
12362
12363         switch (flavor) {
12364         case VM_PAGE_INFO_BASIC:
12365                 basic_info = (vm_page_info_basic_t) info;
12366                 basic_info->disposition = disposition;
12367                 basic_info->ref_count = ref_count;
12368                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12369                 basic_info->offset =
12370                         (memory_object_offset_t) offset + offset_in_page;
12371                 basic_info->depth = depth;
12372                 break;
12373         }
12374
12375         return retval;
12376 }
12377
12378 /*
12379  *      vm_map_msync
12380  *
12381  *      Synchronises the memory range specified with its backing store
12382  *      image by either flushing or cleaning the contents to the appropriate
12383  *      memory manager engaging in a memory object synchronize dialog with
12384  *      the manager.  The client doesn't return until the manager issues
12385  *      m_o_s_completed message.  MIG Magically converts user task parameter
12386  *      to the task's address map.
12387  *
12388  *      interpretation of sync_flags
12389  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
12390  *                                pages to manager.
12391  *
12392  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12393  *                              - discard pages, write dirty or precious
12394  *                                pages back to memory manager.
12395  *
12396  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12397  *                              - write dirty or precious pages back to
12398  *                                the memory manager.
12399  *
12400  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
12401  *                                is a hole in the region, and we would
12402  *                                have returned KERN_SUCCESS, return
12403  *                                KERN_INVALID_ADDRESS instead.
12404  *
12405  *      NOTE
12406  *      The memory object attributes have not yet been implemented, this
12407  *      function will have to deal with the invalidate attribute
12408  *
12409  *      RETURNS
12410  *      KERN_INVALID_TASK               Bad task parameter
12411  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
12412  *      KERN_SUCCESS                    The usual.
12413  *      KERN_INVALID_ADDRESS            There was a hole in the region.
12414  */
12415
12416 kern_return_t
12417 vm_map_msync(
12418         vm_map_t                map,
12419         vm_map_address_t        address,
12420         vm_map_size_t           size,
12421         vm_sync_t               sync_flags)
12422 {
12423         msync_req_t             msr;
12424         msync_req_t             new_msr;
12425         queue_chain_t           req_q;  /* queue of requests for this msync */
12426         vm_map_entry_t          entry;
12427         vm_map_size_t           amount_left;
12428         vm_object_offset_t      offset;
12429         boolean_t               do_sync_req;
12430         boolean_t               had_hole = FALSE;
12431         memory_object_t         pager;
12432
12433         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12434             (sync_flags & VM_SYNC_SYNCHRONOUS))
12435                 return(KERN_INVALID_ARGUMENT);
12436
12437         /*
12438          * align address and size on page boundaries
12439          */
12440         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12441         address = vm_map_trunc_page(address);
12442
12443         if (map == VM_MAP_NULL)
12444                 return(KERN_INVALID_TASK);
12445
12446         if (size == 0)
12447                 return(KERN_SUCCESS);
12448
12449         queue_init(&req_q);
12450         amount_left = size;
12451
12452         while (amount_left > 0) {
12453                 vm_object_size_t        flush_size;
12454                 vm_object_t             object;
12455
12456                 vm_map_lock(map);
12457                 if (!vm_map_lookup_entry(map,
12458                                          vm_map_trunc_page(address), &entry)) {
12459
12460                         vm_map_size_t   skip;
12461
12462                         /*
12463                          * hole in the address map.
12464                          */
12465                         had_hole = TRUE;
12466
12467                         /*
12468                          * Check for empty map.
12469                          */
12470                         if (entry == vm_map_to_entry(map) &&
12471                             entry->vme_next == entry) {
12472                                 vm_map_unlock(map);
12473                                 break;
12474                         }
12475                         /*
12476                          * Check that we don't wrap and that
12477                          * we have at least one real map entry.
12478                          */
12479                         if ((map->hdr.nentries == 0) ||
12480                             (entry->vme_next->vme_start < address)) {
12481                                 vm_map_unlock(map);
12482                                 break;
12483                         }
12484                         /*
12485                          * Move up to the next entry if needed
12486                          */
12487                         skip = (entry->vme_next->vme_start - address);
12488                         if (skip >= amount_left)
12489                                 amount_left = 0;
12490                         else
12491                                 amount_left -= skip;
12492                         address = entry->vme_next->vme_start;
12493                         vm_map_unlock(map);
12494                         continue;
12495                 }
12496
12497                 offset = address - entry->vme_start;
12498
12499                 /*
12500                  * do we have more to flush than is contained in this
12501                  * entry ?
12502                  */
12503                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12504                         flush_size = entry->vme_end -
12505                                 (entry->vme_start + offset);
12506                 } else {
12507                         flush_size = amount_left;
12508                 }
12509                 amount_left -= flush_size;
12510                 address += flush_size;
12511
12512                 if (entry->is_sub_map == TRUE) {
12513                         vm_map_t        local_map;
12514                         vm_map_offset_t local_offset;
12515
12516                         local_map = entry->object.sub_map;
12517                         local_offset = entry->offset;
12518                         vm_map_unlock(map);
12519                         if (vm_map_msync(
12520                                     local_map,
12521                                     local_offset,
12522                                     flush_size,
12523                                     sync_flags) == KERN_INVALID_ADDRESS) {
12524                                 had_hole = TRUE;
12525                         }
12526                         continue;
12527                 }
12528                 object = entry->object.vm_object;
12529
12530                 /*
12531                  * We can't sync this object if the object has not been
12532                  * created yet
12533                  */
12534                 if (object == VM_OBJECT_NULL) {
12535                         vm_map_unlock(map);
12536                         continue;
12537                 }
12538                 offset += entry->offset;
12539
12540                 vm_object_lock(object);
12541
12542                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12543                         int kill_pages = 0;
12544                         boolean_t reusable_pages = FALSE;
12545
12546                         if (sync_flags & VM_SYNC_KILLPAGES) {
12547                                 if (object->ref_count == 1 && !object->shadow)
12548                                         kill_pages = 1;
12549                                 else
12550                                         kill_pages = -1;
12551                         }
12552                         if (kill_pages != -1)
12553                                 vm_object_deactivate_pages(object, offset,
12554                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12555                         vm_object_unlock(object);
12556                         vm_map_unlock(map);
12557                         continue;
12558                 }
12559                 /*
12560                  * We can't sync this object if there isn't a pager.
12561                  * Don't bother to sync internal objects, since there can't
12562                  * be any "permanent" storage for these objects anyway.
12563                  */
12564                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12565                     (object->internal) || (object->private)) {
12566                         vm_object_unlock(object);
12567                         vm_map_unlock(map);
12568                         continue;
12569                 }
12570                 /*
12571                  * keep reference on the object until syncing is done
12572                  */
12573                 vm_object_reference_locked(object);
12574                 vm_object_unlock(object);
12575
12576                 vm_map_unlock(map);
12577
12578                 do_sync_req = vm_object_sync(object,
12579                                              offset,
12580                                              flush_size,
12581                                              sync_flags & VM_SYNC_INVALIDATE,
12582                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12583                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12584                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12585                 /*
12586                  * only send a m_o_s if we returned pages or if the entry
12587                  * is writable (ie dirty pages may have already been sent back)
12588                  */
12589                 if (!do_sync_req) {
12590                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12591                                 /*
12592                                  * clear out the clustering and read-ahead hints
12593                                  */
12594                                 vm_object_lock(object);
12595
12596                                 object->pages_created = 0;
12597                                 object->pages_used = 0;
12598                                 object->sequential = 0;
12599                                 object->last_alloc = 0;
12600
12601                                 vm_object_unlock(object);
12602                         }
12603                         vm_object_deallocate(object);
12604                         continue;
12605                 }
12606                 msync_req_alloc(new_msr);
12607
12608                 vm_object_lock(object);
12609                 offset += object->paging_offset;
12610
12611                 new_msr->offset = offset;
12612                 new_msr->length = flush_size;
12613                 new_msr->object = object;
12614                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12615         re_iterate:
12616
12617                 /*
12618                  * We can't sync this object if there isn't a pager.  The
12619                  * pager can disappear anytime we're not holding the object
12620                  * lock.  So this has to be checked anytime we goto re_iterate.
12621                  */
12622
12623                 pager = object->pager;
12624
12625                 if (pager == MEMORY_OBJECT_NULL) {
12626                         vm_object_unlock(object);
12627                         vm_object_deallocate(object);
12628                         continue;
12629                 }
12630
12631                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12632                         /*
12633                          * need to check for overlapping entry, if found, wait
12634                          * on overlapping msr to be done, then reiterate
12635                          */
12636                         msr_lock(msr);
12637                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12638                             ((offset >= msr->offset &&
12639                               offset < (msr->offset + msr->length)) ||
12640                              (msr->offset >= offset &&
12641                               msr->offset < (offset + flush_size))))
12642                         {
12643                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12644                                 msr_unlock(msr);
12645                                 vm_object_unlock(object);
12646                                 thread_block(THREAD_CONTINUE_NULL);
12647                                 vm_object_lock(object);
12648                                 goto re_iterate;
12649                         }
12650                         msr_unlock(msr);
12651                 }/* queue_iterate */
12652
12653                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12654
12655                 vm_object_paging_begin(object);
12656                 vm_object_unlock(object);
12657
12658                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12659
12660                 (void) memory_object_synchronize(
12661                         pager,
12662                         offset,
12663                         flush_size,
12664                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12665
12666                 vm_object_lock(object);
12667                 vm_object_paging_end(object);
12668                 vm_object_unlock(object);
12669         }/* while */
12670
12671         /*
12672          * wait for memory_object_sychronize_completed messages from pager(s)
12673          */
12674
12675         while (!queue_empty(&req_q)) {
12676                 msr = (msync_req_t)queue_first(&req_q);
12677                 msr_lock(msr);
12678                 while(msr->flag != VM_MSYNC_DONE) {
12679                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12680                         msr_unlock(msr);
12681                         thread_block(THREAD_CONTINUE_NULL);
12682                         msr_lock(msr);
12683                 }/* while */
12684                 queue_remove(&req_q, msr, msync_req_t, req_q);
12685                 msr_unlock(msr);
12686                 vm_object_deallocate(msr->object);
12687                 msync_req_free(msr);
12688         }/* queue_iterate */
12689
12690         /* for proper msync() behaviour */
12691         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12692                 return(KERN_INVALID_ADDRESS);
12693
12694         return(KERN_SUCCESS);
12695 }/* vm_msync */
12696
12697 /*
12698  *      Routine:        convert_port_entry_to_map
12699  *      Purpose:
12700  *              Convert from a port specifying an entry or a task
12701  *              to a map. Doesn't consume the port ref; produces a map ref,
12702  *              which may be null.  Unlike convert_port_to_map, the
12703  *              port may be task or a named entry backed.
12704  *      Conditions:
12705  *              Nothing locked.
12706  */
12707
12708
12709 vm_map_t
12710 convert_port_entry_to_map(
12711         ipc_port_t      port)
12712 {
12713         vm_map_t map;
12714         vm_named_entry_t        named_entry;
12715         uint32_t        try_failed_count = 0;
12716
12717         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12718                 while(TRUE) {
12719                         ip_lock(port);
12720                         if(ip_active(port) && (ip_kotype(port)
12721                                                == IKOT_NAMED_ENTRY)) {
12722                                 named_entry =
12723                                         (vm_named_entry_t)port->ip_kobject;
12724                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12725                                         ip_unlock(port);
12726
12727                                         try_failed_count++;
12728                                         mutex_pause(try_failed_count);
12729                                         continue;
12730                                 }
12731                                 named_entry->ref_count++;
12732                                 lck_mtx_unlock(&(named_entry)->Lock);
12733                                 ip_unlock(port);
12734                                 if ((named_entry->is_sub_map) &&
12735                                     (named_entry->protection
12736                                      & VM_PROT_WRITE)) {
12737                                         map = named_entry->backing.map;
12738                                 } else {
12739                                         mach_destroy_memory_entry(port);
12740                                         return VM_MAP_NULL;
12741                                 }
12742                                 vm_map_reference_swap(map);
12743                                 mach_destroy_memory_entry(port);
12744                                 break;
12745                         }
12746                         else
12747                                 return VM_MAP_NULL;
12748                 }
12749         }
12750         else
12751                 map = convert_port_to_map(port);
12752
12753         return map;
12754 }
12755
12756 /*
12757  *      Routine:        convert_port_entry_to_object
12758  *      Purpose:
12759  *              Convert from a port specifying a named entry to an
12760  *              object. Doesn't consume the port ref; produces a map ref,
12761  *              which may be null.
12762  *      Conditions:
12763  *              Nothing locked.
12764  */
12765
12766
12767 vm_object_t
12768 convert_port_entry_to_object(
12769         ipc_port_t      port)
12770 {
12771         vm_object_t object;
12772         vm_named_entry_t        named_entry;
12773         uint32_t        try_failed_count = 0;
12774
12775         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12776                 while(TRUE) {
12777                         ip_lock(port);
12778                         if(ip_active(port) && (ip_kotype(port)
12779                                                == IKOT_NAMED_ENTRY)) {
12780                                 named_entry =
12781                                         (vm_named_entry_t)port->ip_kobject;
12782                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12783                                         ip_unlock(port);
12784
12785                                         try_failed_count++;
12786                                         mutex_pause(try_failed_count);
12787                                         continue;
12788                                 }
12789                                 named_entry->ref_count++;
12790                                 lck_mtx_unlock(&(named_entry)->Lock);
12791                                 ip_unlock(port);
12792                                 if ((!named_entry->is_sub_map) &&
12793                                     (!named_entry->is_pager) &&
12794                                     (named_entry->protection
12795                                      & VM_PROT_WRITE)) {
12796                                         object = named_entry->backing.object;
12797                                 } else {
12798                                         mach_destroy_memory_entry(port);
12799                                         return (vm_object_t)NULL;
12800                                 }
12801                                 vm_object_reference(named_entry->backing.object);
12802                                 mach_destroy_memory_entry(port);
12803                                 break;
12804                         }
12805                         else
12806                                 return (vm_object_t)NULL;
12807                 }
12808         } else {
12809                 return (vm_object_t)NULL;
12810         }
12811
12812         return object;
12813 }
12814
12815 /*
12816  * Export routines to other components for the things we access locally through
12817  * macros.
12818  */
12819 #undef current_map
12820 vm_map_t
12821 current_map(void)
12822 {
12823         return (current_map_fast());
12824 }
12825
12826 /*
12827  *      vm_map_reference:
12828  *
12829  *      Most code internal to the osfmk will go through a
12830  *      macro defining this.  This is always here for the
12831  *      use of other kernel components.
12832  */
12833 #undef vm_map_reference
12834 void
12835 vm_map_reference(
12836         register vm_map_t       map)
12837 {
12838         if (map == VM_MAP_NULL)
12839                 return;
12840
12841         lck_mtx_lock(&map->s_lock);
12842 #if     TASK_SWAPPER
12843         assert(map->res_count > 0);
12844         assert(map->ref_count >= map->res_count);
12845         map->res_count++;
12846 #endif
12847         map->ref_count++;
12848         lck_mtx_unlock(&map->s_lock);
12849 }
12850
12851 /*
12852  *      vm_map_deallocate:
12853  *
12854  *      Removes a reference from the specified map,
12855  *      destroying it if no references remain.
12856  *      The map should not be locked.
12857  */
12858 void
12859 vm_map_deallocate(
12860         register vm_map_t       map)
12861 {
12862         unsigned int            ref;
12863
12864         if (map == VM_MAP_NULL)
12865                 return;
12866
12867         lck_mtx_lock(&map->s_lock);
12868         ref = --map->ref_count;
12869         if (ref > 0) {
12870                 vm_map_res_deallocate(map);
12871                 lck_mtx_unlock(&map->s_lock);
12872                 return;
12873         }
12874         assert(map->ref_count == 0);
12875         lck_mtx_unlock(&map->s_lock);
12876
12877 #if     TASK_SWAPPER
12878         /*
12879          * The map residence count isn't decremented here because
12880          * the vm_map_delete below will traverse the entire map,
12881          * deleting entries, and the residence counts on objects
12882          * and sharing maps will go away then.
12883          */
12884 #endif
12885
12886         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12887 }
12888
12889
12890 void
12891 vm_map_disable_NX(vm_map_t map)
12892 {
12893         if (map == NULL)
12894                 return;
12895         if (map->pmap == NULL)
12896                 return;
12897
12898         pmap_disable_NX(map->pmap);
12899 }
12900
12901 void
12902 vm_map_disallow_data_exec(vm_map_t map)
12903 {
12904     if (map == NULL)
12905         return;
12906
12907     map->map_disallow_data_exec = TRUE;
12908 }
12909
12910 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12911  * more descriptive.
12912  */
12913 void
12914 vm_map_set_32bit(vm_map_t map)
12915 {
12916         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12917 }
12918
12919
12920 void
12921 vm_map_set_64bit(vm_map_t map)
12922 {
12923         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12924 }
12925
12926 vm_map_offset_t
12927 vm_compute_max_offset(unsigned is64)
12928 {
12929         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12930 }
12931
12932 boolean_t
12933 vm_map_is_64bit(
12934                 vm_map_t map)
12935 {
12936         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12937 }
12938
12939 boolean_t
12940 vm_map_has_4GB_pagezero(
12941                 vm_map_t map)
12942 {
12943         /*
12944          * XXX FBDP
12945          * We should lock the VM map (for read) here but we can get away
12946          * with it for now because there can't really be any race condition:
12947          * the VM map's min_offset is changed only when the VM map is created
12948          * and when the zero page is established (when the binary gets loaded),
12949          * and this routine gets called only when the task terminates and the
12950          * VM map is being torn down, and when a new map is created via
12951          * load_machfile()/execve().
12952          */
12953         return (map->min_offset >= 0x100000000ULL);
12954 }
12955
12956 void
12957 vm_map_set_4GB_pagezero(vm_map_t map)
12958 {
12959 #if defined(__i386__)
12960         pmap_set_4GB_pagezero(map->pmap);
12961 #else
12962 #pragma unused(map)
12963 #endif
12964
12965 }
12966
12967 void
12968 vm_map_clear_4GB_pagezero(vm_map_t map)
12969 {
12970 #if defined(__i386__)
12971         pmap_clear_4GB_pagezero(map->pmap);
12972 #else
12973 #pragma unused(map)
12974 #endif
12975 }
12976
12977 /*
12978  * Raise a VM map's minimum offset.
12979  * To strictly enforce "page zero" reservation.
12980  */
12981 kern_return_t
12982 vm_map_raise_min_offset(
12983         vm_map_t        map,
12984         vm_map_offset_t new_min_offset)
12985 {
12986         vm_map_entry_t  first_entry;
12987
12988         new_min_offset = vm_map_round_page(new_min_offset);
12989
12990         vm_map_lock(map);
12991
12992         if (new_min_offset < map->min_offset) {
12993                 /*
12994                  * Can't move min_offset backwards, as that would expose
12995                  * a part of the address space that was previously, and for
12996                  * possibly good reasons, inaccessible.
12997                  */
12998                 vm_map_unlock(map);
12999                 return KERN_INVALID_ADDRESS;
13000         }
13001
13002         first_entry = vm_map_first_entry(map);
13003         if (first_entry != vm_map_to_entry(map) &&
13004             first_entry->vme_start < new_min_offset) {
13005                 /*
13006                  * Some memory was already allocated below the new
13007                  * minimun offset.  It's too late to change it now...
13008                  */
13009                 vm_map_unlock(map);
13010                 return KERN_NO_SPACE;
13011         }
13012
13013         map->min_offset = new_min_offset;
13014
13015         vm_map_unlock(map);
13016
13017         return KERN_SUCCESS;
13018 }
13019
13020 /*
13021  * Set the limit on the maximum amount of user wired memory allowed for this map.
13022  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
13023  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
13024  * don't have to reach over to the BSD data structures.
13025  */
13026
13027 void
13028 vm_map_set_user_wire_limit(vm_map_t     map,
13029                            vm_size_t    limit)
13030 {
13031         map->user_wire_limit = limit;
13032 }
13033
13034
13035 void vm_map_switch_protect(vm_map_t     map,
13036                            boolean_t    val)
13037 {
13038         vm_map_lock(map);
13039         map->switch_protect=val;
13040         vm_map_unlock(map);
13041 }
13042
13043 /* Add (generate) code signature for memory range */
13044 #if CONFIG_DYNAMIC_CODE_SIGNING
13045 kern_return_t vm_map_sign(vm_map_t map,
13046                  vm_map_offset_t start,
13047                  vm_map_offset_t end)
13048 {
13049         vm_map_entry_t entry;
13050         vm_page_t m;
13051         vm_object_t object;
13052
13053         /*
13054          * Vet all the input parameters and current type and state of the
13055          * underlaying object.  Return with an error if anything is amiss.
13056          */
13057         if (map == VM_MAP_NULL)
13058                 return(KERN_INVALID_ARGUMENT);
13059
13060         vm_map_lock_read(map);
13061
13062         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13063                 /*
13064                  * Must pass a valid non-submap address.
13065                  */
13066                 vm_map_unlock_read(map);
13067                 return(KERN_INVALID_ADDRESS);
13068         }
13069
13070         if((entry->vme_start > start) || (entry->vme_end < end)) {
13071                 /*
13072                  * Map entry doesn't cover the requested range. Not handling
13073                  * this situation currently.
13074                  */
13075                 vm_map_unlock_read(map);
13076                 return(KERN_INVALID_ARGUMENT);
13077         }
13078
13079         object = entry->object.vm_object;
13080         if (object == VM_OBJECT_NULL) {
13081                 /*
13082                  * Object must already be present or we can't sign.
13083                  */
13084                 vm_map_unlock_read(map);
13085                 return KERN_INVALID_ARGUMENT;
13086         }
13087
13088         vm_object_lock(object);
13089         vm_map_unlock_read(map);
13090
13091         while(start < end) {
13092                 uint32_t refmod;
13093
13094                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13095                 if (m==VM_PAGE_NULL) {
13096                         /* shoud we try to fault a page here? we can probably
13097                          * demand it exists and is locked for this request */
13098                         vm_object_unlock(object);
13099                         return KERN_FAILURE;
13100                 }
13101                 /* deal with special page status */
13102                 if (m->busy ||
13103                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13104                         vm_object_unlock(object);
13105                         return KERN_FAILURE;
13106                 }
13107
13108                 /* Page is OK... now "validate" it */
13109                 /* This is the place where we'll call out to create a code
13110                  * directory, later */
13111                 m->cs_validated = TRUE;
13112
13113                 /* The page is now "clean" for codesigning purposes. That means
13114                  * we don't consider it as modified (wpmapped) anymore. But
13115                  * we'll disconnect the page so we note any future modification
13116                  * attempts. */
13117                 m->wpmapped = FALSE;
13118                 refmod = pmap_disconnect(m->phys_page);
13119
13120                 /* Pull the dirty status from the pmap, since we cleared the
13121                  * wpmapped bit */
13122                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13123                         m->dirty = TRUE;
13124                 }
13125
13126                 /* On to the next page */
13127                 start += PAGE_SIZE;
13128         }
13129         vm_object_unlock(object);
13130
13131         return KERN_SUCCESS;
13132 }
13133 #endif
13134
13135 #if CONFIG_FREEZE
13136
13137 kern_return_t vm_map_freeze_walk(
13138                 vm_map_t map,
13139                 unsigned int *purgeable_count,
13140                 unsigned int *wired_count,
13141                 unsigned int *clean_count,
13142                 unsigned int *dirty_count,
13143                 boolean_t *has_shared)
13144 {
13145         vm_map_entry_t entry;
13146
13147         vm_map_lock_read(map);
13148
13149         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13150         *has_shared = FALSE;
13151
13152         for (entry = vm_map_first_entry(map);
13153              entry != vm_map_to_entry(map);
13154              entry = entry->vme_next) {
13155                 unsigned int purgeable, clean, dirty, wired;
13156                 boolean_t shared;
13157
13158                 if ((entry->object.vm_object == 0) ||
13159                     (entry->is_sub_map) ||
13160                     (entry->object.vm_object->phys_contiguous)) {
13161                         continue;
13162                 }
13163
13164                 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
13165
13166                 *purgeable_count += purgeable;
13167                 *wired_count += wired;
13168                 *clean_count += clean;
13169                 *dirty_count += dirty;
13170
13171                 if (shared) {
13172                         *has_shared = TRUE;
13173                 }
13174         }
13175
13176         vm_map_unlock_read(map);
13177
13178         return KERN_SUCCESS;
13179 }
13180
13181 kern_return_t vm_map_freeze(
13182                 vm_map_t map,
13183                 unsigned int *purgeable_count,
13184                 unsigned int *wired_count,
13185                 unsigned int *clean_count,
13186                 unsigned int *dirty_count,
13187                 boolean_t *has_shared)
13188 {
13189         vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13190         vm_object_t compact_object = VM_OBJECT_NULL;
13191         vm_object_offset_t offset = 0x0;
13192         kern_return_t kr = KERN_SUCCESS;
13193         void *default_freezer_toc = NULL;
13194         boolean_t cleanup = FALSE;
13195
13196         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13197         *has_shared = FALSE;
13198
13199         /* Create our compact object */
13200         compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13201         if (!compact_object) {
13202                 kr = KERN_FAILURE;
13203                 goto done;
13204         }
13205
13206         default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13207         if (!default_freezer_toc) {
13208                 kr = KERN_FAILURE;
13209                 goto done;
13210         }
13211
13212         /*
13213          * We need the exclusive lock here so that we can
13214          * block any page faults or lookups while we are
13215          * in the middle of freezing this vm map.
13216          */
13217         vm_map_lock(map);
13218
13219         if (map->default_freezer_toc != NULL){
13220                 /*
13221                  * This map has already been frozen.
13222                  */
13223                 cleanup = TRUE;
13224                 kr = KERN_SUCCESS;
13225                 goto done;
13226         }
13227
13228         /* Get a mapping in place for the freezing about to commence */
13229         map->default_freezer_toc = default_freezer_toc;
13230
13231         vm_object_lock(compact_object);
13232
13233         for (entry2 = vm_map_first_entry(map);
13234              entry2 != vm_map_to_entry(map);
13235              entry2 = entry2->vme_next) {
13236
13237                 vm_object_t     src_object = entry2->object.vm_object;
13238
13239                 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13240                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13241                         unsigned int purgeable, clean, dirty, wired;
13242                         boolean_t shared;
13243
13244                         vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13245                                                         src_object, compact_object, &default_freezer_toc, &offset);
13246
13247                         *purgeable_count += purgeable;
13248                         *wired_count += wired;
13249                         *clean_count += clean;
13250                         *dirty_count += dirty;
13251
13252                         if (shared) {
13253                                 *has_shared = TRUE;
13254                         }
13255                 }
13256         }
13257
13258         vm_object_unlock(compact_object);
13259
13260         /* Finally, throw out the pages to swap */
13261         vm_object_pageout(compact_object);
13262
13263 done:
13264         vm_map_unlock(map);
13265
13266         /* Unwind if there was a failure */
13267         if ((cleanup) || (KERN_SUCCESS != kr)) {
13268                 if (default_freezer_toc){
13269                         default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13270                 }
13271                 if (compact_object){
13272                         vm_object_deallocate(compact_object);
13273                 }
13274         }
13275
13276         return kr;
13277 }
13278
13279 __private_extern__ vm_object_t  default_freezer_get_compact_vm_object( void** );
13280
13281 void
13282 vm_map_thaw(
13283         vm_map_t map)
13284 {
13285         void **default_freezer_toc;
13286         vm_object_t compact_object;
13287
13288         vm_map_lock(map);
13289
13290         if (map->default_freezer_toc == NULL){
13291                 /*
13292                  * This map is not in a frozen state.
13293                  */
13294                 goto out;
13295         }
13296
13297         default_freezer_toc = &(map->default_freezer_toc);
13298
13299         compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13300
13301         /* Bring the pages back in */
13302         vm_object_pagein(compact_object);
13303
13304         /* Shift pages back to their original objects */
13305         vm_object_unpack(compact_object, default_freezer_toc);
13306
13307         vm_object_deallocate(compact_object);
13308
13309         map->default_freezer_toc = NULL;
13310
13311 out:
13312         vm_map_unlock(map);
13313 }
13314 #endif
13315
13316 #if !CONFIG_EMBEDDED
13317 /*
13318  * vm_map_entry_should_cow_for_true_share:
13319  *
13320  * Determines if the map entry should be clipped and setup for copy-on-write
13321  * to avoid applying "true_share" to a large VM object when only a subset is
13322  * targeted.
13323  *
13324  * For now, we target only the map entries created for the Objective C
13325  * Garbage Collector, which initially have the following properties:
13326  *      - alias == VM_MEMORY_MALLOC
13327  *      - wired_count == 0
13328  *      - !needs_copy
13329  * and a VM object with:
13330  *      - internal
13331  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13332  *      - !true_share
13333  *      - vo_size == ANON_CHUNK_SIZE
13334  */
13335 boolean_t
13336 vm_map_entry_should_cow_for_true_share(
13337         vm_map_entry_t  entry)
13338 {
13339         vm_object_t     object;
13340
13341         if (entry->is_sub_map) {
13342                 /* entry does not point at a VM object */
13343                 return FALSE;
13344         }
13345
13346         if (entry->needs_copy) {
13347                 /* already set for copy_on_write: done! */
13348                 return FALSE;
13349         }
13350
13351         if (entry->alias != VM_MEMORY_MALLOC) {
13352                 /* not tagged as an ObjectiveC's Garbage Collector entry */
13353                 return FALSE;
13354         }
13355
13356         if (entry->wired_count) {
13357                 /* wired: can't change the map entry... */
13358                 return FALSE;
13359         }
13360
13361         object = entry->object.vm_object;
13362
13363         if (object == VM_OBJECT_NULL) {
13364                 /* no object yet... */
13365                 return FALSE;
13366         }
13367
13368         if (!object->internal) {
13369                 /* not an internal object */
13370                 return FALSE;
13371         }
13372
13373         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13374                 /* not the default copy strategy */
13375                 return FALSE;
13376         }
13377
13378         if (object->true_share) {
13379                 /* already true_share: too late to avoid it */
13380                 return FALSE;
13381         }
13382
13383         if (object->vo_size != ANON_CHUNK_SIZE) {
13384                 /* not an object created for the ObjC Garbage Collector */
13385                 return FALSE;
13386         }
13387
13388         /*
13389          * All the criteria match: we have a large object being targeted for "true_share".
13390          * To limit the adverse side-effects linked with "true_share", tell the caller to
13391          * try and avoid setting up the entire object for "true_share" by clipping the
13392          * targeted range and setting it up for copy-on-write.
13393          */
13394         return TRUE;
13395 }
13396 #endif /* !CONFIG_EMBEDDED */