osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <machine/db_machdep.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_shared_region.h>
 107 #include <vm/vm_map_store.h>
 108
 109 /* Internal prototypes
 110  */
 111
 112 static void vm_map_simplify_range(
 113         vm_map_t        map,
 114         vm_map_offset_t start,
 115         vm_map_offset_t end);   /* forward */
 116
 117 static boolean_t        vm_map_range_check(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end,
 121         vm_map_entry_t  *entry);
 122
 123 static vm_map_entry_t   _vm_map_entry_create(
 124         struct vm_map_header    *map_header);
 125
 126 static void             _vm_map_entry_dispose(
 127         struct vm_map_header    *map_header,
 128         vm_map_entry_t          entry);
 129
 130 static void             vm_map_pmap_enter(
 131         vm_map_t                map,
 132         vm_map_offset_t         addr,
 133         vm_map_offset_t         end_addr,
 134         vm_object_t             object,
 135         vm_object_offset_t      offset,
 136         vm_prot_t               protection);
 137
 138 static void             _vm_map_clip_end(
 139         struct vm_map_header    *map_header,
 140         vm_map_entry_t          entry,
 141         vm_map_offset_t         end);
 142
 143 static void             _vm_map_clip_start(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         start);
 147
 148 static void             vm_map_entry_delete(
 149         vm_map_t        map,
 150         vm_map_entry_t  entry);
 151
 152 static kern_return_t    vm_map_delete(
 153         vm_map_t        map,
 154         vm_map_offset_t start,
 155         vm_map_offset_t end,
 156         int             flags,
 157         vm_map_t        zap_map);
 158
 159 static kern_return_t    vm_map_copy_overwrite_unaligned(
 160         vm_map_t        dst_map,
 161         vm_map_entry_t  entry,
 162         vm_map_copy_t   copy,
 163         vm_map_address_t start);
 164
 165 static kern_return_t    vm_map_copy_overwrite_aligned(
 166         vm_map_t        dst_map,
 167         vm_map_entry_t  tmp_entry,
 168         vm_map_copy_t   copy,
 169         vm_map_offset_t start,
 170         pmap_t          pmap);
 171
 172 static kern_return_t    vm_map_copyin_kernel_buffer(
 173         vm_map_t        src_map,
 174         vm_map_address_t src_addr,
 175         vm_map_size_t   len,
 176         boolean_t       src_destroy,
 177         vm_map_copy_t   *copy_result);  /* OUT */
 178
 179 static kern_return_t    vm_map_copyout_kernel_buffer(
 180         vm_map_t        map,
 181         vm_map_address_t *addr, /* IN/OUT */
 182         vm_map_copy_t   copy,
 183         boolean_t       overwrite);
 184
 185 static void             vm_map_fork_share(
 186         vm_map_t        old_map,
 187         vm_map_entry_t  old_entry,
 188         vm_map_t        new_map);
 189
 190 static boolean_t        vm_map_fork_copy(
 191         vm_map_t        old_map,
 192         vm_map_entry_t  *old_entry_p,
 193         vm_map_t        new_map);
 194
 195 void            vm_map_region_top_walk(
 196         vm_map_entry_t             entry,
 197         vm_region_top_info_t       top);
 198
 199 void            vm_map_region_walk(
 200         vm_map_t                   map,
 201         vm_map_offset_t            va,
 202         vm_map_entry_t             entry,
 203         vm_object_offset_t         offset,
 204         vm_object_size_t           range,
 205         vm_region_extended_info_t  extended,
 206         boolean_t                  look_for_pages);
 207
 208 static kern_return_t    vm_map_wire_nested(
 209         vm_map_t                   map,
 210         vm_map_offset_t            start,
 211         vm_map_offset_t            end,
 212         vm_prot_t                  access_type,
 213         boolean_t                  user_wire,
 214         pmap_t                     map_pmap,
 215         vm_map_offset_t            pmap_addr);
 216
 217 static kern_return_t    vm_map_unwire_nested(
 218         vm_map_t                   map,
 219         vm_map_offset_t            start,
 220         vm_map_offset_t            end,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr);
 224
 225 static kern_return_t    vm_map_overwrite_submap_recurse(
 226         vm_map_t                   dst_map,
 227         vm_map_offset_t            dst_addr,
 228         vm_map_size_t              dst_size);
 229
 230 static kern_return_t    vm_map_copy_overwrite_nested(
 231         vm_map_t                   dst_map,
 232         vm_map_offset_t            dst_addr,
 233         vm_map_copy_t              copy,
 234         boolean_t                  interruptible,
 235         pmap_t                     pmap,
 236         boolean_t                  discard_on_success);
 237
 238 static kern_return_t    vm_map_remap_extract(
 239         vm_map_t                map,
 240         vm_map_offset_t         addr,
 241         vm_map_size_t           size,
 242         boolean_t               copy,
 243         struct vm_map_header    *map_header,
 244         vm_prot_t               *cur_protection,
 245         vm_prot_t               *max_protection,
 246         vm_inherit_t            inheritance,
 247         boolean_t               pageable);
 248
 249 static kern_return_t    vm_map_remap_range_allocate(
 250         vm_map_t                map,
 251         vm_map_address_t        *address,
 252         vm_map_size_t           size,
 253         vm_map_offset_t         mask,
 254         int                     flags,
 255         vm_map_entry_t          *map_entry);
 256
 257 static void             vm_map_region_look_for_page(
 258         vm_map_t                   map,
 259         vm_map_offset_t            va,
 260         vm_object_t                object,
 261         vm_object_offset_t         offset,
 262         int                        max_refcnt,
 263         int                        depth,
 264         vm_region_extended_info_t  extended);
 265
 266 static int              vm_map_region_count_obj_refs(
 267         vm_map_entry_t             entry,
 268         vm_object_t                object);
 269
 270
 271 static kern_return_t    vm_map_willneed(
 272         vm_map_t        map,
 273         vm_map_offset_t start,
 274         vm_map_offset_t end);
 275
 276 static kern_return_t    vm_map_reuse_pages(
 277         vm_map_t        map,
 278         vm_map_offset_t start,
 279         vm_map_offset_t end);
 280
 281 static kern_return_t    vm_map_reusable_pages(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_can_reuse(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 #if CONFIG_FREEZE
 292 struct default_freezer_table;
 293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
 294 __private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);
 295 #endif
 296
 297 /*
 298  * Macros to copy a vm_map_entry. We must be careful to correctly
 299  * manage the wired page count. vm_map_entry_copy() creates a new
 300  * map entry to the same memory - the wired count in the new entry
 301  * must be set to zero. vm_map_entry_copy_full() creates a new
 302  * entry that is identical to the old entry.  This preserves the
 303  * wire count; it's used for map splitting and zone changing in
 304  * vm_map_copyout.
 305  */
 306 #define vm_map_entry_copy(NEW,OLD) \
 307 MACRO_BEGIN                                     \
 308         *(NEW) = *(OLD);                \
 309         (NEW)->is_shared = FALSE;       \
 310         (NEW)->needs_wakeup = FALSE;    \
 311         (NEW)->in_transition = FALSE;   \
 312         (NEW)->wired_count = 0;         \
 313         (NEW)->user_wired_count = 0;    \
 314         (NEW)->permanent = FALSE;       \
 315 MACRO_END
 316
 317 #define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
 318
 319 /*
 320  *      Decide if we want to allow processes to execute from their data or stack areas.
 321  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 322  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 323  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 324  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 325  *      specific pmap files since the default behavior varies according to architecture.  The
 326  *      main reason it varies is because of the need to provide binary compatibility with old
 327  *      applications that were written before these restrictions came into being.  In the old
 328  *      days, an app could execute anything it could read, but this has slowly been tightened
 329  *      up over time.  The default behavior is:
 330  *
 331  *      32-bit PPC apps         may execute from both stack and data areas
 332  *      32-bit Intel apps       may exeucte from data areas but not stack
 333  *      64-bit PPC/Intel apps   may not execute from either data or stack
 334  *
 335  *      An application on any architecture may override these defaults by explicitly
 336  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 337  *      system call.  This code here just determines what happens when an app tries to
 338  *      execute from a page that lacks execute permission.
 339  *
 340  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 341  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 342  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 343  *      execution from data areas for a particular binary even if the arch normally permits it. As
 344  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 345  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 346  *      are not all NX-safe.
 347  */
 348
 349 extern int allow_data_exec, allow_stack_exec;
 350
 351 int
 352 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 353 {
 354         int current_abi;
 355
 356         /*
 357          * Determine if the app is running in 32 or 64 bit mode.
 358          */
 359
 360         if (vm_map_is_64bit(map))
 361                 current_abi = VM_ABI_64;
 362         else
 363                 current_abi = VM_ABI_32;
 364
 365         /*
 366          * Determine if we should allow the execution based on whether it's a
 367          * stack or data area and the current architecture.
 368          */
 369
 370         if (user_tag == VM_MEMORY_STACK)
 371                 return allow_stack_exec & current_abi;
 372
 373         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 374 }
 375
 376
 377 /*
 378  *      Virtual memory maps provide for the mapping, protection,
 379  *      and sharing of virtual memory objects.  In addition,
 380  *      this module provides for an efficient virtual copy of
 381  *      memory from one map to another.
 382  *
 383  *      Synchronization is required prior to most operations.
 384  *
 385  *      Maps consist of an ordered doubly-linked list of simple
 386  *      entries; a single hint is used to speed up lookups.
 387  *
 388  *      Sharing maps have been deleted from this version of Mach.
 389  *      All shared objects are now mapped directly into the respective
 390  *      maps.  This requires a change in the copy on write strategy;
 391  *      the asymmetric (delayed) strategy is used for shared temporary
 392  *      objects instead of the symmetric (shadow) strategy.  All maps
 393  *      are now "top level" maps (either task map, kernel map or submap
 394  *      of the kernel map).
 395  *
 396  *      Since portions of maps are specified by start/end addreses,
 397  *      which may not align with existing map entries, all
 398  *      routines merely "clip" entries to these start/end values.
 399  *      [That is, an entry is split into two, bordering at a
 400  *      start or end value.]  Note that these clippings may not
 401  *      always be necessary (as the two resulting entries are then
 402  *      not changed); however, the clipping is done for convenience.
 403  *      No attempt is currently made to "glue back together" two
 404  *      abutting entries.
 405  *
 406  *      The symmetric (shadow) copy strategy implements virtual copy
 407  *      by copying VM object references from one map to
 408  *      another, and then marking both regions as copy-on-write.
 409  *      It is important to note that only one writeable reference
 410  *      to a VM object region exists in any map when this strategy
 411  *      is used -- this means that shadow object creation can be
 412  *      delayed until a write operation occurs.  The symmetric (delayed)
 413  *      strategy allows multiple maps to have writeable references to
 414  *      the same region of a vm object, and hence cannot delay creating
 415  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 416  *      Copying of permanent objects is completely different; see
 417  *      vm_object_copy_strategically() in vm_object.c.
 418  */
 419
 420 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 421 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 422 static zone_t   vm_map_kentry_zone;     /* zone for kernel entry structures */
 423 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 424
 425
 426 /*
 427  *      Placeholder object for submap operations.  This object is dropped
 428  *      into the range by a call to vm_map_find, and removed when
 429  *      vm_map_submap creates the submap.
 430  */
 431
 432 vm_object_t     vm_submap_object;
 433
 434 static void             *map_data;
 435 static vm_size_t        map_data_size;
 436 static void             *kentry_data;
 437 static vm_size_t        kentry_data_size;
 438 static int              kentry_count = 2048;            /* to init kentry_data_size */
 439
 440 #if CONFIG_EMBEDDED
 441 #define         NO_COALESCE_LIMIT  0
 442 #else
 443 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 444 #endif
 445
 446 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 447 unsigned int not_in_kdp = 1;
 448
 449 unsigned int vm_map_set_cache_attr_count = 0;
 450
 451 kern_return_t
 452 vm_map_set_cache_attr(
 453         vm_map_t        map,
 454         vm_map_offset_t va)
 455 {
 456         vm_map_entry_t  map_entry;
 457         vm_object_t     object;
 458         kern_return_t   kr = KERN_SUCCESS;
 459
 460         vm_map_lock_read(map);
 461
 462         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 463             map_entry->is_sub_map) {
 464                 /*
 465                  * that memory is not properly mapped
 466                  */
 467                 kr = KERN_INVALID_ARGUMENT;
 468                 goto done;
 469         }
 470         object = map_entry->object.vm_object;
 471
 472         if (object == VM_OBJECT_NULL) {
 473                 /*
 474                  * there should be a VM object here at this point
 475                  */
 476                 kr = KERN_INVALID_ARGUMENT;
 477                 goto done;
 478         }
 479         vm_object_lock(object);
 480         object->set_cache_attr = TRUE;
 481         vm_object_unlock(object);
 482
 483         vm_map_set_cache_attr_count++;
 484 done:
 485         vm_map_unlock_read(map);
 486
 487         return kr;
 488 }
 489
 490
 491 #if CONFIG_CODE_DECRYPTION
 492 /*
 493  * vm_map_apple_protected:
 494  * This remaps the requested part of the object with an object backed by
 495  * the decrypting pager.
 496  * crypt_info contains entry points and session data for the crypt module.
 497  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 498  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 499  */
 500 kern_return_t
 501 vm_map_apple_protected(
 502         vm_map_t        map,
 503         vm_map_offset_t start,
 504         vm_map_offset_t end,
 505         struct pager_crypt_info *crypt_info)
 506 {
 507         boolean_t       map_locked;
 508         kern_return_t   kr;
 509         vm_map_entry_t  map_entry;
 510         memory_object_t protected_mem_obj;
 511         vm_object_t     protected_object;
 512         vm_map_offset_t map_addr;
 513
 514         vm_map_lock_read(map);
 515         map_locked = TRUE;
 516
 517         /* lookup the protected VM object */
 518         if (!vm_map_lookup_entry(map,
 519                                  start,
 520                                  &map_entry) ||
 521             map_entry->vme_end < end ||
 522             map_entry->is_sub_map) {
 523                 /* that memory is not properly mapped */
 524                 kr = KERN_INVALID_ARGUMENT;
 525                 goto done;
 526         }
 527         protected_object = map_entry->object.vm_object;
 528         if (protected_object == VM_OBJECT_NULL) {
 529                 /* there should be a VM object here at this point */
 530                 kr = KERN_INVALID_ARGUMENT;
 531                 goto done;
 532         }
 533
 534         /* make sure protected object stays alive while map is unlocked */
 535         vm_object_reference(protected_object);
 536
 537         vm_map_unlock_read(map);
 538         map_locked = FALSE;
 539
 540         /*
 541          * Lookup (and create if necessary) the protected memory object
 542          * matching that VM object.
 543          * If successful, this also grabs a reference on the memory object,
 544          * to guarantee that it doesn't go away before we get a chance to map
 545          * it.
 546          */
 547         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 548
 549         /* release extra ref on protected object */
 550         vm_object_deallocate(protected_object);
 551
 552         if (protected_mem_obj == NULL) {
 553                 kr = KERN_FAILURE;
 554                 goto done;
 555         }
 556
 557         /* map this memory object in place of the current one */
 558         map_addr = start;
 559         kr = vm_map_enter_mem_object(map,
 560                                      &map_addr,
 561                                      end - start,
 562                                      (mach_vm_offset_t) 0,
 563                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 564                                      (ipc_port_t) protected_mem_obj,
 565                                      (map_entry->offset +
 566                                       (start - map_entry->vme_start)),
 567                                      TRUE,
 568                                      map_entry->protection,
 569                                      map_entry->max_protection,
 570                                      map_entry->inheritance);
 571         assert(map_addr == start);
 572         /*
 573          * Release the reference obtained by apple_protect_pager_setup().
 574          * The mapping (if it succeeded) is now holding a reference on the
 575          * memory object.
 576          */
 577         memory_object_deallocate(protected_mem_obj);
 578
 579 done:
 580         if (map_locked) {
 581                 vm_map_unlock_read(map);
 582         }
 583         return kr;
 584 }
 585 #endif  /* CONFIG_CODE_DECRYPTION */
 586
 587
 588 lck_grp_t               vm_map_lck_grp;
 589 lck_grp_attr_t  vm_map_lck_grp_attr;
 590 lck_attr_t              vm_map_lck_attr;
 591
 592
 593 /*
 594  *      vm_map_init:
 595  *
 596  *      Initialize the vm_map module.  Must be called before
 597  *      any other vm_map routines.
 598  *
 599  *      Map and entry structures are allocated from zones -- we must
 600  *      initialize those zones.
 601  *
 602  *      There are three zones of interest:
 603  *
 604  *      vm_map_zone:            used to allocate maps.
 605  *      vm_map_entry_zone:      used to allocate map entries.
 606  *      vm_map_kentry_zone:     used to allocate map entries for the kernel.
 607  *
 608  *      The kernel allocates map entries from a special zone that is initially
 609  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 610  *      the kernel to allocate more memory to a entry zone when it became
 611  *      empty since the very act of allocating memory implies the creation
 612  *      of a new entry.
 613  */
 614 void
 615 vm_map_init(
 616         void)
 617 {
 618         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 619                             PAGE_SIZE, "maps");
 620         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 621
 622         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 623                                   1024*1024, PAGE_SIZE*5,
 624                                   "non-kernel map entries");
 625         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 626
 627         vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 628                                    kentry_data_size, kentry_data_size,
 629                                    "kernel map entries");
 630         zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
 631
 632         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 633                                  16*1024, PAGE_SIZE, "map copies");
 634         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 635
 636         /*
 637          *      Cram the map and kentry zones with initial data.
 638          *      Set kentry_zone non-collectible to aid zone_gc().
 639          */
 640         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 641         zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
 642         zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
 643         zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
 644         zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 645         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 646
 647         zcram(vm_map_zone, map_data, map_data_size);
 648         zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
 649
 650         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 651         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 652         lck_attr_setdefault(&vm_map_lck_attr);
 653 }
 654
 655 void
 656 vm_map_steal_memory(
 657         void)
 658 {
 659         map_data_size = round_page(10 * sizeof(struct _vm_map));
 660         map_data = pmap_steal_memory(map_data_size);
 661
 662 #if 0
 663         /*
 664          * Limiting worst case: vm_map_kentry_zone needs to map each "available"
 665          * physical page (i.e. that beyond the kernel image and page tables)
 666          * individually; we guess at most one entry per eight pages in the
 667          * real world. This works out to roughly .1 of 1% of physical memory,
 668          * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
 669          */
 670 #endif
 671         kentry_count = pmap_free_pages() / 8;
 672
 673
 674         kentry_data_size =
 675                 round_page(kentry_count * sizeof(struct vm_map_entry));
 676         kentry_data = pmap_steal_memory(kentry_data_size);
 677 }
 678
 679 /*
 680  *      vm_map_create:
 681  *
 682  *      Creates and returns a new empty VM map with
 683  *      the given physical map structure, and having
 684  *      the given lower and upper address bounds.
 685  */
 686 vm_map_t
 687 vm_map_create(
 688         pmap_t                  pmap,
 689         vm_map_offset_t min,
 690         vm_map_offset_t max,
 691         boolean_t               pageable)
 692 {
 693         static int              color_seed = 0;
 694         register vm_map_t       result;
 695
 696         result = (vm_map_t) zalloc(vm_map_zone);
 697         if (result == VM_MAP_NULL)
 698                 panic("vm_map_create");
 699
 700         vm_map_first_entry(result) = vm_map_to_entry(result);
 701         vm_map_last_entry(result)  = vm_map_to_entry(result);
 702         result->hdr.nentries = 0;
 703         result->hdr.entries_pageable = pageable;
 704
 705         vm_map_store_init( &(result->hdr) );
 706
 707         result->size = 0;
 708         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 709         result->user_wire_size  = 0;
 710         result->ref_count = 1;
 711 #if     TASK_SWAPPER
 712         result->res_count = 1;
 713         result->sw_state = MAP_SW_IN;
 714 #endif  /* TASK_SWAPPER */
 715         result->pmap = pmap;
 716         result->min_offset = min;
 717         result->max_offset = max;
 718         result->wiring_required = FALSE;
 719         result->no_zero_fill = FALSE;
 720         result->mapped = FALSE;
 721         result->wait_for_space = FALSE;
 722         result->switch_protect = FALSE;
 723         result->disable_vmentry_reuse = FALSE;
 724         result->map_disallow_data_exec = FALSE;
 725         result->highest_entry_end = 0;
 726         result->first_free = vm_map_to_entry(result);
 727         result->hint = vm_map_to_entry(result);
 728         result->color_rr = (color_seed++) & vm_color_mask;
 729         result->jit_entry_exists = FALSE;
 730 #if CONFIG_FREEZE
 731         result->default_freezer_toc = NULL;
 732 #endif
 733         vm_map_lock_init(result);
 734         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 735
 736         return(result);
 737 }
 738
 739 /*
 740  *      vm_map_entry_create:    [ internal use only ]
 741  *
 742  *      Allocates a VM map entry for insertion in the
 743  *      given map (or map copy).  No fields are filled.
 744  */
 745 #define vm_map_entry_create(map) \
 746         _vm_map_entry_create(&(map)->hdr)
 747
 748 #define vm_map_copy_entry_create(copy) \
 749         _vm_map_entry_create(&(copy)->cpy_hdr)
 750
 751 static vm_map_entry_t
 752 _vm_map_entry_create(
 753         register struct vm_map_header   *map_header)
 754 {
 755         register zone_t zone;
 756         register vm_map_entry_t entry;
 757
 758         if (map_header->entries_pageable)
 759                 zone = vm_map_entry_zone;
 760         else
 761                 zone = vm_map_kentry_zone;
 762
 763         entry = (vm_map_entry_t) zalloc(zone);
 764         if (entry == VM_MAP_ENTRY_NULL)
 765                 panic("vm_map_entry_create");
 766         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 767
 768         return(entry);
 769 }
 770
 771 /*
 772  *      vm_map_entry_dispose:   [ internal use only ]
 773  *
 774  *      Inverse of vm_map_entry_create.
 775  *
 776  *      write map lock held so no need to
 777  *      do anything special to insure correctness
 778  *      of the stores
 779  */
 780 #define vm_map_entry_dispose(map, entry)                        \
 781         vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);  \
 782         _vm_map_entry_dispose(&(map)->hdr, (entry))
 783
 784 #define vm_map_copy_entry_dispose(map, entry) \
 785         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 786
 787 static void
 788 _vm_map_entry_dispose(
 789         register struct vm_map_header   *map_header,
 790         register vm_map_entry_t         entry)
 791 {
 792         register zone_t         zone;
 793
 794         if (map_header->entries_pageable)
 795                 zone = vm_map_entry_zone;
 796         else
 797                 zone = vm_map_kentry_zone;
 798
 799         zfree(zone, entry);
 800 }
 801
 802 #if MACH_ASSERT
 803 static boolean_t first_free_check = FALSE;
 804 boolean_t
 805 first_free_is_valid(
 806         vm_map_t        map)
 807 {
 808         if (!first_free_check)
 809                 return TRUE;
 810
 811         return( first_free_is_valid_store( map ));
 812 }
 813 #endif /* MACH_ASSERT */
 814
 815
 816 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 817         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 818
 819 #define vm_map_copy_entry_unlink(copy, entry)                           \
 820         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 821
 822 #if     MACH_ASSERT && TASK_SWAPPER
 823 /*
 824  *      vm_map_res_reference:
 825  *
 826  *      Adds another valid residence count to the given map.
 827  *
 828  *      Map is locked so this function can be called from
 829  *      vm_map_swapin.
 830  *
 831  */
 832 void vm_map_res_reference(register vm_map_t map)
 833 {
 834         /* assert map is locked */
 835         assert(map->res_count >= 0);
 836         assert(map->ref_count >= map->res_count);
 837         if (map->res_count == 0) {
 838                 lck_mtx_unlock(&map->s_lock);
 839                 vm_map_lock(map);
 840                 vm_map_swapin(map);
 841                 lck_mtx_lock(&map->s_lock);
 842                 ++map->res_count;
 843                 vm_map_unlock(map);
 844         } else
 845                 ++map->res_count;
 846 }
 847
 848 /*
 849  *      vm_map_reference_swap:
 850  *
 851  *      Adds valid reference and residence counts to the given map.
 852  *
 853  *      The map may not be in memory (i.e. zero residence count).
 854  *
 855  */
 856 void vm_map_reference_swap(register vm_map_t map)
 857 {
 858         assert(map != VM_MAP_NULL);
 859         lck_mtx_lock(&map->s_lock);
 860         assert(map->res_count >= 0);
 861         assert(map->ref_count >= map->res_count);
 862         map->ref_count++;
 863         vm_map_res_reference(map);
 864         lck_mtx_unlock(&map->s_lock);
 865 }
 866
 867 /*
 868  *      vm_map_res_deallocate:
 869  *
 870  *      Decrement residence count on a map; possibly causing swapout.
 871  *
 872  *      The map must be in memory (i.e. non-zero residence count).
 873  *
 874  *      The map is locked, so this function is callable from vm_map_deallocate.
 875  *
 876  */
 877 void vm_map_res_deallocate(register vm_map_t map)
 878 {
 879         assert(map->res_count > 0);
 880         if (--map->res_count == 0) {
 881                 lck_mtx_unlock(&map->s_lock);
 882                 vm_map_lock(map);
 883                 vm_map_swapout(map);
 884                 vm_map_unlock(map);
 885                 lck_mtx_lock(&map->s_lock);
 886         }
 887         assert(map->ref_count >= map->res_count);
 888 }
 889 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 890
 891 /*
 892  *      vm_map_destroy:
 893  *
 894  *      Actually destroy a map.
 895  */
 896 void
 897 vm_map_destroy(
 898         vm_map_t        map,
 899         int             flags)
 900 {
 901         vm_map_lock(map);
 902
 903         /* clean up regular map entries */
 904         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 905                              flags, VM_MAP_NULL);
 906         /* clean up leftover special mappings (commpage, etc...) */
 907         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 908                              flags, VM_MAP_NULL);
 909
 910 #if CONFIG_FREEZE
 911         if (map->default_freezer_toc){
 912                 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
 913         }
 914 #endif
 915         vm_map_unlock(map);
 916
 917         assert(map->hdr.nentries == 0);
 918
 919         if(map->pmap)
 920                 pmap_destroy(map->pmap);
 921
 922         zfree(vm_map_zone, map);
 923 }
 924
 925 #if     TASK_SWAPPER
 926 /*
 927  * vm_map_swapin/vm_map_swapout
 928  *
 929  * Swap a map in and out, either referencing or releasing its resources.
 930  * These functions are internal use only; however, they must be exported
 931  * because they may be called from macros, which are exported.
 932  *
 933  * In the case of swapout, there could be races on the residence count,
 934  * so if the residence count is up, we return, assuming that a
 935  * vm_map_deallocate() call in the near future will bring us back.
 936  *
 937  * Locking:
 938  *      -- We use the map write lock for synchronization among races.
 939  *      -- The map write lock, and not the simple s_lock, protects the
 940  *         swap state of the map.
 941  *      -- If a map entry is a share map, then we hold both locks, in
 942  *         hierarchical order.
 943  *
 944  * Synchronization Notes:
 945  *      1) If a vm_map_swapin() call happens while swapout in progress, it
 946  *      will block on the map lock and proceed when swapout is through.
 947  *      2) A vm_map_reference() call at this time is illegal, and will
 948  *      cause a panic.  vm_map_reference() is only allowed on resident
 949  *      maps, since it refuses to block.
 950  *      3) A vm_map_swapin() call during a swapin will block, and
 951  *      proceeed when the first swapin is done, turning into a nop.
 952  *      This is the reason the res_count is not incremented until
 953  *      after the swapin is complete.
 954  *      4) There is a timing hole after the checks of the res_count, before
 955  *      the map lock is taken, during which a swapin may get the lock
 956  *      before a swapout about to happen.  If this happens, the swapin
 957  *      will detect the state and increment the reference count, causing
 958  *      the swapout to be a nop, thereby delaying it until a later
 959  *      vm_map_deallocate.  If the swapout gets the lock first, then
 960  *      the swapin will simply block until the swapout is done, and
 961  *      then proceed.
 962  *
 963  * Because vm_map_swapin() is potentially an expensive operation, it
 964  * should be used with caution.
 965  *
 966  * Invariants:
 967  *      1) A map with a residence count of zero is either swapped, or
 968  *         being swapped.
 969  *      2) A map with a non-zero residence count is either resident,
 970  *         or being swapped in.
 971  */
 972
 973 int vm_map_swap_enable = 1;
 974
 975 void vm_map_swapin (vm_map_t map)
 976 {
 977         register vm_map_entry_t entry;
 978
 979         if (!vm_map_swap_enable)        /* debug */
 980                 return;
 981
 982         /*
 983          * Map is locked
 984          * First deal with various races.
 985          */
 986         if (map->sw_state == MAP_SW_IN)
 987                 /*
 988                  * we raced with swapout and won.  Returning will incr.
 989                  * the res_count, turning the swapout into a nop.
 990                  */
 991                 return;
 992
 993         /*
 994          * The residence count must be zero.  If we raced with another
 995          * swapin, the state would have been IN; if we raced with a
 996          * swapout (after another competing swapin), we must have lost
 997          * the race to get here (see above comment), in which case
 998          * res_count is still 0.
 999          */
1000         assert(map->res_count == 0);
1001
1002         /*
1003          * There are no intermediate states of a map going out or
1004          * coming in, since the map is locked during the transition.
1005          */
1006         assert(map->sw_state == MAP_SW_OUT);
1007
1008         /*
1009          * We now operate upon each map entry.  If the entry is a sub-
1010          * or share-map, we call vm_map_res_reference upon it.
1011          * If the entry is an object, we call vm_object_res_reference
1012          * (this may iterate through the shadow chain).
1013          * Note that we hold the map locked the entire time,
1014          * even if we get back here via a recursive call in
1015          * vm_map_res_reference.
1016          */
1017         entry = vm_map_first_entry(map);
1018
1019         while (entry != vm_map_to_entry(map)) {
1020                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1021                         if (entry->is_sub_map) {
1022                                 vm_map_t lmap = entry->object.sub_map;
1023                                 lck_mtx_lock(&lmap->s_lock);
1024                                 vm_map_res_reference(lmap);
1025                                 lck_mtx_unlock(&lmap->s_lock);
1026                         } else {
1027                                 vm_object_t object = entry->object.vm_object;
1028                                 vm_object_lock(object);
1029                                 /*
1030                                  * This call may iterate through the
1031                                  * shadow chain.
1032                                  */
1033                                 vm_object_res_reference(object);
1034                                 vm_object_unlock(object);
1035                         }
1036                 }
1037                 entry = entry->vme_next;
1038         }
1039         assert(map->sw_state == MAP_SW_OUT);
1040         map->sw_state = MAP_SW_IN;
1041 }
1042
1043 void vm_map_swapout(vm_map_t map)
1044 {
1045         register vm_map_entry_t entry;
1046
1047         /*
1048          * Map is locked
1049          * First deal with various races.
1050          * If we raced with a swapin and lost, the residence count
1051          * will have been incremented to 1, and we simply return.
1052          */
1053         lck_mtx_lock(&map->s_lock);
1054         if (map->res_count != 0) {
1055                 lck_mtx_unlock(&map->s_lock);
1056                 return;
1057         }
1058         lck_mtx_unlock(&map->s_lock);
1059
1060         /*
1061          * There are no intermediate states of a map going out or
1062          * coming in, since the map is locked during the transition.
1063          */
1064         assert(map->sw_state == MAP_SW_IN);
1065
1066         if (!vm_map_swap_enable)
1067                 return;
1068
1069         /*
1070          * We now operate upon each map entry.  If the entry is a sub-
1071          * or share-map, we call vm_map_res_deallocate upon it.
1072          * If the entry is an object, we call vm_object_res_deallocate
1073          * (this may iterate through the shadow chain).
1074          * Note that we hold the map locked the entire time,
1075          * even if we get back here via a recursive call in
1076          * vm_map_res_deallocate.
1077          */
1078         entry = vm_map_first_entry(map);
1079
1080         while (entry != vm_map_to_entry(map)) {
1081                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082                         if (entry->is_sub_map) {
1083                                 vm_map_t lmap = entry->object.sub_map;
1084                                 lck_mtx_lock(&lmap->s_lock);
1085                                 vm_map_res_deallocate(lmap);
1086                                 lck_mtx_unlock(&lmap->s_lock);
1087                         } else {
1088                                 vm_object_t object = entry->object.vm_object;
1089                                 vm_object_lock(object);
1090                                 /*
1091                                  * This call may take a long time,
1092                                  * since it could actively push
1093                                  * out pages (if we implement it
1094                                  * that way).
1095                                  */
1096                                 vm_object_res_deallocate(object);
1097                                 vm_object_unlock(object);
1098                         }
1099                 }
1100                 entry = entry->vme_next;
1101         }
1102         assert(map->sw_state == MAP_SW_IN);
1103         map->sw_state = MAP_SW_OUT;
1104 }
1105
1106 #endif  /* TASK_SWAPPER */
1107
1108 /*
1109  *      vm_map_lookup_entry:    [ internal use only ]
1110  *
1111  *      Calls into the vm map store layer to find the map
1112  *      entry containing (or immediately preceding) the
1113  *      specified address in the given map; the entry is returned
1114  *      in the "entry" parameter.  The boolean
1115  *      result indicates whether the address is
1116  *      actually contained in the map.
1117  */
1118 boolean_t
1119 vm_map_lookup_entry(
1120         register vm_map_t               map,
1121         register vm_map_offset_t        address,
1122         vm_map_entry_t          *entry)         /* OUT */
1123 {
1124         return ( vm_map_store_lookup_entry( map, address, entry ));
1125 }
1126
1127 /*
1128  *      Routine:        vm_map_find_space
1129  *      Purpose:
1130  *              Allocate a range in the specified virtual address map,
1131  *              returning the entry allocated for that range.
1132  *              Used by kmem_alloc, etc.
1133  *
1134  *              The map must be NOT be locked. It will be returned locked
1135  *              on KERN_SUCCESS, unlocked on failure.
1136  *
1137  *              If an entry is allocated, the object/offset fields
1138  *              are initialized to zero.
1139  */
1140 kern_return_t
1141 vm_map_find_space(
1142         register vm_map_t       map,
1143         vm_map_offset_t         *address,       /* OUT */
1144         vm_map_size_t           size,
1145         vm_map_offset_t         mask,
1146         int                     flags,
1147         vm_map_entry_t          *o_entry)       /* OUT */
1148 {
1149         register vm_map_entry_t entry, new_entry;
1150         register vm_map_offset_t        start;
1151         register vm_map_offset_t        end;
1152
1153         if (size == 0) {
1154                 *address = 0;
1155                 return KERN_INVALID_ARGUMENT;
1156         }
1157
1158         if (flags & VM_FLAGS_GUARD_AFTER) {
1159                 /* account for the back guard page in the size */
1160                 size += PAGE_SIZE_64;
1161         }
1162
1163         new_entry = vm_map_entry_create(map);
1164
1165         /*
1166          *      Look for the first possible address; if there's already
1167          *      something at this address, we have to start after it.
1168          */
1169
1170         vm_map_lock(map);
1171
1172         if( map->disable_vmentry_reuse == TRUE) {
1173                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1174         } else {
1175                 assert(first_free_is_valid(map));
1176                 if ((entry = map->first_free) == vm_map_to_entry(map))
1177                         start = map->min_offset;
1178                 else
1179                         start = entry->vme_end;
1180         }
1181
1182         /*
1183          *      In any case, the "entry" always precedes
1184          *      the proposed new region throughout the loop:
1185          */
1186
1187         while (TRUE) {
1188                 register vm_map_entry_t next;
1189
1190                 /*
1191                  *      Find the end of the proposed new region.
1192                  *      Be sure we didn't go beyond the end, or
1193                  *      wrap around the address.
1194                  */
1195
1196                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1197                         /* reserve space for the front guard page */
1198                         start += PAGE_SIZE_64;
1199                 }
1200                 end = ((start + mask) & ~mask);
1201
1202                 if (end < start) {
1203                         vm_map_entry_dispose(map, new_entry);
1204                         vm_map_unlock(map);
1205                         return(KERN_NO_SPACE);
1206                 }
1207                 start = end;
1208                 end += size;
1209
1210                 if ((end > map->max_offset) || (end < start)) {
1211                         vm_map_entry_dispose(map, new_entry);
1212                         vm_map_unlock(map);
1213                         return(KERN_NO_SPACE);
1214                 }
1215
1216                 /*
1217                  *      If there are no more entries, we must win.
1218                  */
1219
1220                 next = entry->vme_next;
1221                 if (next == vm_map_to_entry(map))
1222                         break;
1223
1224                 /*
1225                  *      If there is another entry, it must be
1226                  *      after the end of the potential new region.
1227                  */
1228
1229                 if (next->vme_start >= end)
1230                         break;
1231
1232                 /*
1233                  *      Didn't fit -- move to the next entry.
1234                  */
1235
1236                 entry = next;
1237                 start = entry->vme_end;
1238         }
1239
1240         /*
1241          *      At this point,
1242          *              "start" and "end" should define the endpoints of the
1243          *                      available new range, and
1244          *              "entry" should refer to the region before the new
1245          *                      range, and
1246          *
1247          *              the map should be locked.
1248          */
1249
1250         if (flags & VM_FLAGS_GUARD_BEFORE) {
1251                 /* go back for the front guard page */
1252                 start -= PAGE_SIZE_64;
1253         }
1254         *address = start;
1255
1256         new_entry->vme_start = start;
1257         new_entry->vme_end = end;
1258         assert(page_aligned(new_entry->vme_start));
1259         assert(page_aligned(new_entry->vme_end));
1260
1261         new_entry->is_shared = FALSE;
1262         new_entry->is_sub_map = FALSE;
1263         new_entry->use_pmap = FALSE;
1264         new_entry->object.vm_object = VM_OBJECT_NULL;
1265         new_entry->offset = (vm_object_offset_t) 0;
1266
1267         new_entry->needs_copy = FALSE;
1268
1269         new_entry->inheritance = VM_INHERIT_DEFAULT;
1270         new_entry->protection = VM_PROT_DEFAULT;
1271         new_entry->max_protection = VM_PROT_ALL;
1272         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1273         new_entry->wired_count = 0;
1274         new_entry->user_wired_count = 0;
1275
1276         new_entry->in_transition = FALSE;
1277         new_entry->needs_wakeup = FALSE;
1278         new_entry->no_cache = FALSE;
1279         new_entry->permanent = FALSE;
1280         new_entry->superpage_size = 0;
1281
1282         new_entry->alias = 0;
1283         new_entry->zero_wired_pages = FALSE;
1284
1285         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1286
1287         /*
1288          *      Insert the new entry into the list
1289          */
1290
1291         vm_map_store_entry_link(map, entry, new_entry);
1292
1293         map->size += size;
1294
1295         /*
1296          *      Update the lookup hint
1297          */
1298         SAVE_HINT_MAP_WRITE(map, new_entry);
1299
1300         *o_entry = new_entry;
1301         return(KERN_SUCCESS);
1302 }
1303
1304 int vm_map_pmap_enter_print = FALSE;
1305 int vm_map_pmap_enter_enable = FALSE;
1306
1307 /*
1308  *      Routine:        vm_map_pmap_enter [internal only]
1309  *
1310  *      Description:
1311  *              Force pages from the specified object to be entered into
1312  *              the pmap at the specified address if they are present.
1313  *              As soon as a page not found in the object the scan ends.
1314  *
1315  *      Returns:
1316  *              Nothing.
1317  *
1318  *      In/out conditions:
1319  *              The source map should not be locked on entry.
1320  */
1321 static void
1322 vm_map_pmap_enter(
1323         vm_map_t                map,
1324         register vm_map_offset_t        addr,
1325         register vm_map_offset_t        end_addr,
1326         register vm_object_t    object,
1327         vm_object_offset_t      offset,
1328         vm_prot_t               protection)
1329 {
1330         int                     type_of_fault;
1331         kern_return_t           kr;
1332
1333         if(map->pmap == 0)
1334                 return;
1335
1336         while (addr < end_addr) {
1337                 register vm_page_t      m;
1338
1339                 vm_object_lock(object);
1340
1341                 m = vm_page_lookup(object, offset);
1342                 /*
1343                  * ENCRYPTED SWAP:
1344                  * The user should never see encrypted data, so do not
1345                  * enter an encrypted page in the page table.
1346                  */
1347                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1348                     m->fictitious ||
1349                     (m->unusual && ( m->error || m->restart || m->absent))) {
1350                         vm_object_unlock(object);
1351                         return;
1352                 }
1353
1354                 if (vm_map_pmap_enter_print) {
1355                         printf("vm_map_pmap_enter:");
1356                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1357                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1358                 }
1359                 type_of_fault = DBG_CACHE_HIT_FAULT;
1360                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1361                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1362                                     &type_of_fault);
1363
1364                 vm_object_unlock(object);
1365
1366                 offset += PAGE_SIZE_64;
1367                 addr += PAGE_SIZE;
1368         }
1369 }
1370
1371 boolean_t vm_map_pmap_is_empty(
1372         vm_map_t        map,
1373         vm_map_offset_t start,
1374         vm_map_offset_t end);
1375 boolean_t vm_map_pmap_is_empty(
1376         vm_map_t        map,
1377         vm_map_offset_t start,
1378         vm_map_offset_t end)
1379 {
1380 #ifdef MACHINE_PMAP_IS_EMPTY
1381         return pmap_is_empty(map->pmap, start, end);
1382 #else   /* MACHINE_PMAP_IS_EMPTY */
1383         vm_map_offset_t offset;
1384         ppnum_t         phys_page;
1385
1386         if (map->pmap == NULL) {
1387                 return TRUE;
1388         }
1389
1390         for (offset = start;
1391              offset < end;
1392              offset += PAGE_SIZE) {
1393                 phys_page = pmap_find_phys(map->pmap, offset);
1394                 if (phys_page) {
1395                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1396                                 "page %d at 0x%llx\n",
1397                                 map, (long long)start, (long long)end,
1398                                 phys_page, (long long)offset);
1399                         return FALSE;
1400                 }
1401         }
1402         return TRUE;
1403 #endif  /* MACHINE_PMAP_IS_EMPTY */
1404 }
1405
1406 /*
1407  *      Routine:        vm_map_enter
1408  *
1409  *      Description:
1410  *              Allocate a range in the specified virtual address map.
1411  *              The resulting range will refer to memory defined by
1412  *              the given memory object and offset into that object.
1413  *
1414  *              Arguments are as defined in the vm_map call.
1415  */
1416 int _map_enter_debug = 0;
1417 static unsigned int vm_map_enter_restore_successes = 0;
1418 static unsigned int vm_map_enter_restore_failures = 0;
1419 kern_return_t
1420 vm_map_enter(
1421         vm_map_t                map,
1422         vm_map_offset_t         *address,       /* IN/OUT */
1423         vm_map_size_t           size,
1424         vm_map_offset_t         mask,
1425         int                     flags,
1426         vm_object_t             object,
1427         vm_object_offset_t      offset,
1428         boolean_t               needs_copy,
1429         vm_prot_t               cur_protection,
1430         vm_prot_t               max_protection,
1431         vm_inherit_t            inheritance)
1432 {
1433         vm_map_entry_t          entry, new_entry;
1434         vm_map_offset_t         start, tmp_start, tmp_offset;
1435         vm_map_offset_t         end, tmp_end;
1436         vm_map_offset_t         tmp2_start, tmp2_end;
1437         vm_map_offset_t         step;
1438         kern_return_t           result = KERN_SUCCESS;
1439         vm_map_t                zap_old_map = VM_MAP_NULL;
1440         vm_map_t                zap_new_map = VM_MAP_NULL;
1441         boolean_t               map_locked = FALSE;
1442         boolean_t               pmap_empty = TRUE;
1443         boolean_t               new_mapping_established = FALSE;
1444         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1445         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1446         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1447         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1448         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1449         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1450         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1451         char                    alias;
1452         vm_map_offset_t         effective_min_offset, effective_max_offset;
1453         kern_return_t           kr;
1454
1455         if (superpage_size) {
1456                 switch (superpage_size) {
1457                         /*
1458                          * Note that the current implementation only supports
1459                          * a single size for superpages, SUPERPAGE_SIZE, per
1460                          * architecture. As soon as more sizes are supposed
1461                          * to be supported, SUPERPAGE_SIZE has to be replaced
1462                          * with a lookup of the size depending on superpage_size.
1463                          */
1464 #ifdef __x86_64__
1465                         case SUPERPAGE_SIZE_ANY:
1466                                 /* handle it like 2 MB and round up to page size */
1467                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1468                         case SUPERPAGE_SIZE_2MB:
1469                                 break;
1470 #endif
1471                         default:
1472                                 return KERN_INVALID_ARGUMENT;
1473                 }
1474                 mask = SUPERPAGE_SIZE-1;
1475                 if (size & (SUPERPAGE_SIZE-1))
1476                         return KERN_INVALID_ARGUMENT;
1477                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1478         }
1479
1480
1481 #if CONFIG_EMBEDDED
1482         if (cur_protection & VM_PROT_WRITE){
1483                 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1484                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1485                         cur_protection &= ~VM_PROT_EXECUTE;
1486                 }
1487         }
1488 #endif /* CONFIG_EMBEDDED */
1489
1490         if (is_submap) {
1491                 if (purgable) {
1492                         /* submaps can not be purgeable */
1493                         return KERN_INVALID_ARGUMENT;
1494                 }
1495                 if (object == VM_OBJECT_NULL) {
1496                         /* submaps can not be created lazily */
1497                         return KERN_INVALID_ARGUMENT;
1498                 }
1499         }
1500         if (flags & VM_FLAGS_ALREADY) {
1501                 /*
1502                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1503                  * is already present.  For it to be meaningul, the requested
1504                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1505                  * we shouldn't try and remove what was mapped there first
1506                  * (!VM_FLAGS_OVERWRITE).
1507                  */
1508                 if ((flags & VM_FLAGS_ANYWHERE) ||
1509                     (flags & VM_FLAGS_OVERWRITE)) {
1510                         return KERN_INVALID_ARGUMENT;
1511                 }
1512         }
1513
1514         effective_min_offset = map->min_offset;
1515
1516         if (flags & VM_FLAGS_BEYOND_MAX) {
1517                 /*
1518                  * Allow an insertion beyond the map's max offset.
1519                  */
1520                 if (vm_map_is_64bit(map))
1521                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1522                 else
1523                         effective_max_offset = 0x00000000FFFFF000ULL;
1524         } else {
1525                 effective_max_offset = map->max_offset;
1526         }
1527
1528         if (size == 0 ||
1529             (offset & PAGE_MASK_64) != 0) {
1530                 *address = 0;
1531                 return KERN_INVALID_ARGUMENT;
1532         }
1533
1534         VM_GET_FLAGS_ALIAS(flags, alias);
1535
1536 #define RETURN(value)   { result = value; goto BailOut; }
1537
1538         assert(page_aligned(*address));
1539         assert(page_aligned(size));
1540
1541         /*
1542          * Only zero-fill objects are allowed to be purgable.
1543          * LP64todo - limit purgable objects to 32-bits for now
1544          */
1545         if (purgable &&
1546             (offset != 0 ||
1547              (object != VM_OBJECT_NULL &&
1548               (object->vo_size != size ||
1549                object->purgable == VM_PURGABLE_DENY))
1550              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1551                 return KERN_INVALID_ARGUMENT;
1552
1553         if (!anywhere && overwrite) {
1554                 /*
1555                  * Create a temporary VM map to hold the old mappings in the
1556                  * affected area while we create the new one.
1557                  * This avoids releasing the VM map lock in
1558                  * vm_map_entry_delete() and allows atomicity
1559                  * when we want to replace some mappings with a new one.
1560                  * It also allows us to restore the old VM mappings if the
1561                  * new mapping fails.
1562                  */
1563                 zap_old_map = vm_map_create(PMAP_NULL,
1564                                             *address,
1565                                             *address + size,
1566                                             map->hdr.entries_pageable);
1567         }
1568
1569 StartAgain: ;
1570
1571         start = *address;
1572
1573         if (anywhere) {
1574                 vm_map_lock(map);
1575                 map_locked = TRUE;
1576
1577                 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1578                         result = KERN_INVALID_ARGUMENT;
1579                         goto BailOut;
1580                 }
1581
1582                 /*
1583                  *      Calculate the first possible address.
1584                  */
1585
1586                 if (start < effective_min_offset)
1587                         start = effective_min_offset;
1588                 if (start > effective_max_offset)
1589                         RETURN(KERN_NO_SPACE);
1590
1591                 /*
1592                  *      Look for the first possible address;
1593                  *      if there's already something at this
1594                  *      address, we have to start after it.
1595                  */
1596
1597                 if( map->disable_vmentry_reuse == TRUE) {
1598                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1599                 } else {
1600                         assert(first_free_is_valid(map));
1601
1602                         entry = map->first_free;
1603
1604                         if (entry == vm_map_to_entry(map)) {
1605                                 entry = NULL;
1606                         } else {
1607                                if (entry->vme_next == vm_map_to_entry(map)){
1608                                        /*
1609                                         * Hole at the end of the map.
1610                                         */
1611                                         entry = NULL;
1612                                } else {
1613                                         if (start < (entry->vme_next)->vme_start ) {
1614                                                 start = entry->vme_end;
1615                                         } else {
1616                                                 /*
1617                                                  * Need to do a lookup.
1618                                                  */
1619                                                 entry = NULL;
1620                                         }
1621                                }
1622                         }
1623
1624                         if (entry == NULL) {
1625                                 vm_map_entry_t  tmp_entry;
1626                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
1627                                         start = tmp_entry->vme_end;
1628                                 entry = tmp_entry;
1629                         }
1630                 }
1631
1632                 /*
1633                  *      In any case, the "entry" always precedes
1634                  *      the proposed new region throughout the
1635                  *      loop:
1636                  */
1637
1638                 while (TRUE) {
1639                         register vm_map_entry_t next;
1640
1641                         /*
1642                          *      Find the end of the proposed new region.
1643                          *      Be sure we didn't go beyond the end, or
1644                          *      wrap around the address.
1645                          */
1646
1647                         end = ((start + mask) & ~mask);
1648                         if (end < start)
1649                                 RETURN(KERN_NO_SPACE);
1650                         start = end;
1651                         end += size;
1652
1653                         if ((end > effective_max_offset) || (end < start)) {
1654                                 if (map->wait_for_space) {
1655                                         if (size <= (effective_max_offset -
1656                                                      effective_min_offset)) {
1657                                                 assert_wait((event_t)map,
1658                                                             THREAD_ABORTSAFE);
1659                                                 vm_map_unlock(map);
1660                                                 map_locked = FALSE;
1661                                                 thread_block(THREAD_CONTINUE_NULL);
1662                                                 goto StartAgain;
1663                                         }
1664                                 }
1665                                 RETURN(KERN_NO_SPACE);
1666                         }
1667
1668                         /*
1669                          *      If there are no more entries, we must win.
1670                          */
1671
1672                         next = entry->vme_next;
1673                         if (next == vm_map_to_entry(map))
1674                                 break;
1675
1676                         /*
1677                          *      If there is another entry, it must be
1678                          *      after the end of the potential new region.
1679                          */
1680
1681                         if (next->vme_start >= end)
1682                                 break;
1683
1684                         /*
1685                          *      Didn't fit -- move to the next entry.
1686                          */
1687
1688                         entry = next;
1689                         start = entry->vme_end;
1690                 }
1691                 *address = start;
1692         } else {
1693                 /*
1694                  *      Verify that:
1695                  *              the address doesn't itself violate
1696                  *              the mask requirement.
1697                  */
1698
1699                 vm_map_lock(map);
1700                 map_locked = TRUE;
1701                 if ((start & mask) != 0)
1702                         RETURN(KERN_NO_SPACE);
1703
1704                 /*
1705                  *      ...     the address is within bounds
1706                  */
1707
1708                 end = start + size;
1709
1710                 if ((start < effective_min_offset) ||
1711                     (end > effective_max_offset) ||
1712                     (start >= end)) {
1713                         RETURN(KERN_INVALID_ADDRESS);
1714                 }
1715
1716                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1717                         /*
1718                          * Fixed mapping and "overwrite" flag: attempt to
1719                          * remove all existing mappings in the specified
1720                          * address range, saving them in our "zap_old_map".
1721                          */
1722                         (void) vm_map_delete(map, start, end,
1723                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1724                                              zap_old_map);
1725                 }
1726
1727                 /*
1728                  *      ...     the starting address isn't allocated
1729                  */
1730
1731                 if (vm_map_lookup_entry(map, start, &entry)) {
1732                         if (! (flags & VM_FLAGS_ALREADY)) {
1733                                 RETURN(KERN_NO_SPACE);
1734                         }
1735                         /*
1736                          * Check if what's already there is what we want.
1737                          */
1738                         tmp_start = start;
1739                         tmp_offset = offset;
1740                         if (entry->vme_start < start) {
1741                                 tmp_start -= start - entry->vme_start;
1742                                 tmp_offset -= start - entry->vme_start;
1743
1744                         }
1745                         for (; entry->vme_start < end;
1746                              entry = entry->vme_next) {
1747                                 /*
1748                                  * Check if the mapping's attributes
1749                                  * match the existing map entry.
1750                                  */
1751                                 if (entry == vm_map_to_entry(map) ||
1752                                     entry->vme_start != tmp_start ||
1753                                     entry->is_sub_map != is_submap ||
1754                                     entry->offset != tmp_offset ||
1755                                     entry->needs_copy != needs_copy ||
1756                                     entry->protection != cur_protection ||
1757                                     entry->max_protection != max_protection ||
1758                                     entry->inheritance != inheritance ||
1759                                     entry->alias != alias) {
1760                                         /* not the same mapping ! */
1761                                         RETURN(KERN_NO_SPACE);
1762                                 }
1763                                 /*
1764                                  * Check if the same object is being mapped.
1765                                  */
1766                                 if (is_submap) {
1767                                         if (entry->object.sub_map !=
1768                                             (vm_map_t) object) {
1769                                                 /* not the same submap */
1770                                                 RETURN(KERN_NO_SPACE);
1771                                         }
1772                                 } else {
1773                                         if (entry->object.vm_object != object) {
1774                                                 /* not the same VM object... */
1775                                                 vm_object_t obj2;
1776
1777                                                 obj2 = entry->object.vm_object;
1778                                                 if ((obj2 == VM_OBJECT_NULL ||
1779                                                      obj2->internal) &&
1780                                                     (object == VM_OBJECT_NULL ||
1781                                                      object->internal)) {
1782                                                         /*
1783                                                          * ... but both are
1784                                                          * anonymous memory,
1785                                                          * so equivalent.
1786                                                          */
1787                                                 } else {
1788                                                         RETURN(KERN_NO_SPACE);
1789                                                 }
1790                                         }
1791                                 }
1792
1793                                 tmp_offset += entry->vme_end - entry->vme_start;
1794                                 tmp_start += entry->vme_end - entry->vme_start;
1795                                 if (entry->vme_end >= end) {
1796                                         /* reached the end of our mapping */
1797                                         break;
1798                                 }
1799                         }
1800                         /* it all matches:  let's use what's already there ! */
1801                         RETURN(KERN_MEMORY_PRESENT);
1802                 }
1803
1804                 /*
1805                  *      ...     the next region doesn't overlap the
1806                  *              end point.
1807                  */
1808
1809                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1810                     (entry->vme_next->vme_start < end))
1811                         RETURN(KERN_NO_SPACE);
1812         }
1813
1814         /*
1815          *      At this point,
1816          *              "start" and "end" should define the endpoints of the
1817          *                      available new range, and
1818          *              "entry" should refer to the region before the new
1819          *                      range, and
1820          *
1821          *              the map should be locked.
1822          */
1823
1824         /*
1825          *      See whether we can avoid creating a new entry (and object) by
1826          *      extending one of our neighbors.  [So far, we only attempt to
1827          *      extend from below.]  Note that we can never extend/join
1828          *      purgable objects because they need to remain distinct
1829          *      entities in order to implement their "volatile object"
1830          *      semantics.
1831          */
1832
1833         if (purgable) {
1834                 if (object == VM_OBJECT_NULL) {
1835                         object = vm_object_allocate(size);
1836                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1837                         object->purgable = VM_PURGABLE_NONVOLATILE;
1838                         offset = (vm_object_offset_t)0;
1839                 }
1840         } else if ((is_submap == FALSE) &&
1841                    (object == VM_OBJECT_NULL) &&
1842                    (entry != vm_map_to_entry(map)) &&
1843                    (entry->vme_end == start) &&
1844                    (!entry->is_shared) &&
1845                    (!entry->is_sub_map) &&
1846                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1847                    (entry->inheritance == inheritance) &&
1848                    (entry->protection == cur_protection) &&
1849                    (entry->max_protection == max_protection) &&
1850                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1851                    (entry->in_transition == 0) &&
1852                    (entry->no_cache == no_cache) &&
1853                    ((entry->vme_end - entry->vme_start) + size <=
1854                     (alias == VM_MEMORY_REALLOC ?
1855                      ANON_CHUNK_SIZE :
1856                      NO_COALESCE_LIMIT)) &&
1857                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1858                 if (vm_object_coalesce(entry->object.vm_object,
1859                                        VM_OBJECT_NULL,
1860                                        entry->offset,
1861                                        (vm_object_offset_t) 0,
1862                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1863                                        (vm_map_size_t)(end - entry->vme_end))) {
1864
1865                         /*
1866                          *      Coalesced the two objects - can extend
1867                          *      the previous map entry to include the
1868                          *      new range.
1869                          */
1870                         map->size += (end - entry->vme_end);
1871                         entry->vme_end = end;
1872                         vm_map_store_update_first_free(map, map->first_free);
1873                         RETURN(KERN_SUCCESS);
1874                 }
1875         }
1876
1877         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1878         new_entry = NULL;
1879
1880         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1881                 tmp2_end = tmp2_start + step;
1882                 /*
1883                  *      Create a new entry
1884                  *      LP64todo - for now, we can only allocate 4GB internal objects
1885                  *      because the default pager can't page bigger ones.  Remove this
1886                  *      when it can.
1887                  *
1888                  * XXX FBDP
1889                  * The reserved "page zero" in each process's address space can
1890                  * be arbitrarily large.  Splitting it into separate 4GB objects and
1891                  * therefore different VM map entries serves no purpose and just
1892                  * slows down operations on the VM map, so let's not split the
1893                  * allocation into 4GB chunks if the max protection is NONE.  That
1894                  * memory should never be accessible, so it will never get to the
1895                  * default pager.
1896                  */
1897                 tmp_start = tmp2_start;
1898                 if (object == VM_OBJECT_NULL &&
1899                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1900                     max_protection != VM_PROT_NONE &&
1901                     superpage_size == 0)
1902                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1903                 else
1904                         tmp_end = tmp2_end;
1905                 do {
1906                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1907                                                         object, offset, needs_copy,
1908                                                         FALSE, FALSE,
1909                                                         cur_protection, max_protection,
1910                                                         VM_BEHAVIOR_DEFAULT,
1911                                                         (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1912                                                         0, no_cache,
1913                                                         permanent, superpage_size);
1914                         new_entry->alias = alias;
1915                         if (flags & VM_FLAGS_MAP_JIT){
1916                                 if (!(map->jit_entry_exists)){
1917                                         new_entry->used_for_jit = TRUE;
1918                                         map->jit_entry_exists = TRUE;
1919                                 }
1920                         }
1921
1922                         if (is_submap) {
1923                                 vm_map_t        submap;
1924                                 boolean_t       submap_is_64bit;
1925                                 boolean_t       use_pmap;
1926
1927                                 new_entry->is_sub_map = TRUE;
1928                                 submap = (vm_map_t) object;
1929                                 submap_is_64bit = vm_map_is_64bit(submap);
1930                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1931         #ifndef NO_NESTED_PMAP
1932                                 if (use_pmap && submap->pmap == NULL) {
1933                                         /* we need a sub pmap to nest... */
1934                                         submap->pmap = pmap_create(0, submap_is_64bit);
1935                                         if (submap->pmap == NULL) {
1936                                                 /* let's proceed without nesting... */
1937                                         }
1938                                 }
1939                                 if (use_pmap && submap->pmap != NULL) {
1940                                         kr = pmap_nest(map->pmap,
1941                                                        submap->pmap,
1942                                                        tmp_start,
1943                                                        tmp_start,
1944                                                        tmp_end - tmp_start);
1945                                         if (kr != KERN_SUCCESS) {
1946                                                 printf("vm_map_enter: "
1947                                                        "pmap_nest(0x%llx,0x%llx) "
1948                                                        "error 0x%x\n",
1949                                                        (long long)tmp_start,
1950                                                        (long long)tmp_end,
1951                                                        kr);
1952                                         } else {
1953                                                 /* we're now nested ! */
1954                                                 new_entry->use_pmap = TRUE;
1955                                                 pmap_empty = FALSE;
1956                                         }
1957                                 }
1958         #endif /* NO_NESTED_PMAP */
1959                         }
1960                         entry = new_entry;
1961
1962                         if (superpage_size) {
1963                                 vm_page_t pages, m;
1964                                 vm_object_t sp_object;
1965
1966                                 entry->offset = 0;
1967
1968                                 /* allocate one superpage */
1969                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
1970                                 if (kr != KERN_SUCCESS) {
1971                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
1972                                         RETURN(kr);
1973                                 }
1974
1975                                 /* create one vm_object per superpage */
1976                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
1977                                 sp_object->phys_contiguous = TRUE;
1978                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
1979                                 entry->object.vm_object = sp_object;
1980
1981                                 /* enter the base pages into the object */
1982                                 vm_object_lock(sp_object);
1983                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
1984                                         m = pages;
1985                                         pmap_zero_page(m->phys_page);
1986                                         pages = NEXT_PAGE(m);
1987                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1988                                         vm_page_insert(m, sp_object, offset);
1989                                 }
1990                                 vm_object_unlock(sp_object);
1991                         }
1992                 } while (tmp_end != tmp2_end &&
1993                          (tmp_start = tmp_end) &&
1994                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
1995                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
1996         }
1997
1998         vm_map_unlock(map);
1999         map_locked = FALSE;
2000
2001         new_mapping_established = TRUE;
2002
2003         /*      Wire down the new entry if the user
2004          *      requested all new map entries be wired.
2005          */
2006         if ((map->wiring_required)||(superpage_size)) {
2007                 pmap_empty = FALSE; /* pmap won't be empty */
2008                 result = vm_map_wire(map, start, end,
2009                                      new_entry->protection, TRUE);
2010                 RETURN(result);
2011         }
2012
2013         if ((object != VM_OBJECT_NULL) &&
2014             (vm_map_pmap_enter_enable) &&
2015             (!anywhere)  &&
2016             (!needs_copy) &&
2017             (size < (128*1024))) {
2018                 pmap_empty = FALSE; /* pmap won't be empty */
2019
2020                 if (override_nx(map, alias) && cur_protection)
2021                         cur_protection |= VM_PROT_EXECUTE;
2022
2023                 vm_map_pmap_enter(map, start, end,
2024                                   object, offset, cur_protection);
2025         }
2026
2027 BailOut: ;
2028         if (result == KERN_SUCCESS) {
2029                 vm_prot_t pager_prot;
2030                 memory_object_t pager;
2031
2032                 if (pmap_empty &&
2033                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2034                         assert(vm_map_pmap_is_empty(map,
2035                                                     *address,
2036                                                     *address+size));
2037                 }
2038
2039                 /*
2040                  * For "named" VM objects, let the pager know that the
2041                  * memory object is being mapped.  Some pagers need to keep
2042                  * track of this, to know when they can reclaim the memory
2043                  * object, for example.
2044                  * VM calls memory_object_map() for each mapping (specifying
2045                  * the protection of each mapping) and calls
2046                  * memory_object_last_unmap() when all the mappings are gone.
2047                  */
2048                 pager_prot = max_protection;
2049                 if (needs_copy) {
2050                         /*
2051                          * Copy-On-Write mapping: won't modify
2052                          * the memory object.
2053                          */
2054                         pager_prot &= ~VM_PROT_WRITE;
2055                 }
2056                 if (!is_submap &&
2057                     object != VM_OBJECT_NULL &&
2058                     object->named &&
2059                     object->pager != MEMORY_OBJECT_NULL) {
2060                         vm_object_lock(object);
2061                         pager = object->pager;
2062                         if (object->named &&
2063                             pager != MEMORY_OBJECT_NULL) {
2064                                 assert(object->pager_ready);
2065                                 vm_object_mapping_wait(object, THREAD_UNINT);
2066                                 vm_object_mapping_begin(object);
2067                                 vm_object_unlock(object);
2068
2069                                 kr = memory_object_map(pager, pager_prot);
2070                                 assert(kr == KERN_SUCCESS);
2071
2072                                 vm_object_lock(object);
2073                                 vm_object_mapping_end(object);
2074                         }
2075                         vm_object_unlock(object);
2076                 }
2077         } else {
2078                 if (new_mapping_established) {
2079                         /*
2080                          * We have to get rid of the new mappings since we
2081                          * won't make them available to the user.
2082                          * Try and do that atomically, to minimize the risk
2083                          * that someone else create new mappings that range.
2084                          */
2085                         zap_new_map = vm_map_create(PMAP_NULL,
2086                                                     *address,
2087                                                     *address + size,
2088                                                     map->hdr.entries_pageable);
2089                         if (!map_locked) {
2090                                 vm_map_lock(map);
2091                                 map_locked = TRUE;
2092                         }
2093                         (void) vm_map_delete(map, *address, *address+size,
2094                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2095                                              zap_new_map);
2096                 }
2097                 if (zap_old_map != VM_MAP_NULL &&
2098                     zap_old_map->hdr.nentries != 0) {
2099                         vm_map_entry_t  entry1, entry2;
2100
2101                         /*
2102                          * The new mapping failed.  Attempt to restore
2103                          * the old mappings, saved in the "zap_old_map".
2104                          */
2105                         if (!map_locked) {
2106                                 vm_map_lock(map);
2107                                 map_locked = TRUE;
2108                         }
2109
2110                         /* first check if the coast is still clear */
2111                         start = vm_map_first_entry(zap_old_map)->vme_start;
2112                         end = vm_map_last_entry(zap_old_map)->vme_end;
2113                         if (vm_map_lookup_entry(map, start, &entry1) ||
2114                             vm_map_lookup_entry(map, end, &entry2) ||
2115                             entry1 != entry2) {
2116                                 /*
2117                                  * Part of that range has already been
2118                                  * re-mapped:  we can't restore the old
2119                                  * mappings...
2120                                  */
2121                                 vm_map_enter_restore_failures++;
2122                         } else {
2123                                 /*
2124                                  * Transfer the saved map entries from
2125                                  * "zap_old_map" to the original "map",
2126                                  * inserting them all after "entry1".
2127                                  */
2128                                 for (entry2 = vm_map_first_entry(zap_old_map);
2129                                      entry2 != vm_map_to_entry(zap_old_map);
2130                                      entry2 = vm_map_first_entry(zap_old_map)) {
2131                                         vm_map_size_t entry_size;
2132
2133                                         entry_size = (entry2->vme_end -
2134                                                       entry2->vme_start);
2135                                         vm_map_store_entry_unlink(zap_old_map,
2136                                                             entry2);
2137                                         zap_old_map->size -= entry_size;
2138                                         vm_map_store_entry_link(map, entry1, entry2);
2139                                         map->size += entry_size;
2140                                         entry1 = entry2;
2141                                 }
2142                                 if (map->wiring_required) {
2143                                         /*
2144                                          * XXX TODO: we should rewire the
2145                                          * old pages here...
2146                                          */
2147                                 }
2148                                 vm_map_enter_restore_successes++;
2149                         }
2150                 }
2151         }
2152
2153         if (map_locked) {
2154                 vm_map_unlock(map);
2155         }
2156
2157         /*
2158          * Get rid of the "zap_maps" and all the map entries that
2159          * they may still contain.
2160          */
2161         if (zap_old_map != VM_MAP_NULL) {
2162                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2163                 zap_old_map = VM_MAP_NULL;
2164         }
2165         if (zap_new_map != VM_MAP_NULL) {
2166                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2167                 zap_new_map = VM_MAP_NULL;
2168         }
2169
2170         return result;
2171
2172 #undef  RETURN
2173 }
2174
2175 kern_return_t
2176 vm_map_enter_mem_object(
2177         vm_map_t                target_map,
2178         vm_map_offset_t         *address,
2179         vm_map_size_t           initial_size,
2180         vm_map_offset_t         mask,
2181         int                     flags,
2182         ipc_port_t              port,
2183         vm_object_offset_t      offset,
2184         boolean_t               copy,
2185         vm_prot_t               cur_protection,
2186         vm_prot_t               max_protection,
2187         vm_inherit_t            inheritance)
2188 {
2189         vm_map_address_t        map_addr;
2190         vm_map_size_t           map_size;
2191         vm_object_t             object;
2192         vm_object_size_t        size;
2193         kern_return_t           result;
2194         boolean_t               mask_cur_protection, mask_max_protection;
2195
2196         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2197         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2198         cur_protection &= ~VM_PROT_IS_MASK;
2199         max_protection &= ~VM_PROT_IS_MASK;
2200
2201         /*
2202          * Check arguments for validity
2203          */
2204         if ((target_map == VM_MAP_NULL) ||
2205             (cur_protection & ~VM_PROT_ALL) ||
2206             (max_protection & ~VM_PROT_ALL) ||
2207             (inheritance > VM_INHERIT_LAST_VALID) ||
2208             initial_size == 0)
2209                 return KERN_INVALID_ARGUMENT;
2210
2211         map_addr = vm_map_trunc_page(*address);
2212         map_size = vm_map_round_page(initial_size);
2213         size = vm_object_round_page(initial_size);
2214
2215         /*
2216          * Find the vm object (if any) corresponding to this port.
2217          */
2218         if (!IP_VALID(port)) {
2219                 object = VM_OBJECT_NULL;
2220                 offset = 0;
2221                 copy = FALSE;
2222         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2223                 vm_named_entry_t        named_entry;
2224
2225                 named_entry = (vm_named_entry_t) port->ip_kobject;
2226                 /* a few checks to make sure user is obeying rules */
2227                 if (size == 0) {
2228                         if (offset >= named_entry->size)
2229                                 return KERN_INVALID_RIGHT;
2230                         size = named_entry->size - offset;
2231                 }
2232                 if (mask_max_protection) {
2233                         max_protection &= named_entry->protection;
2234                 }
2235                 if (mask_cur_protection) {
2236                         cur_protection &= named_entry->protection;
2237                 }
2238                 if ((named_entry->protection & max_protection) !=
2239                     max_protection)
2240                         return KERN_INVALID_RIGHT;
2241                 if ((named_entry->protection & cur_protection) !=
2242                     cur_protection)
2243                         return KERN_INVALID_RIGHT;
2244                 if (named_entry->size < (offset + size))
2245                         return KERN_INVALID_ARGUMENT;
2246
2247                 /* the callers parameter offset is defined to be the */
2248                 /* offset from beginning of named entry offset in object */
2249                 offset = offset + named_entry->offset;
2250
2251                 named_entry_lock(named_entry);
2252                 if (named_entry->is_sub_map) {
2253                         vm_map_t                submap;
2254
2255                         submap = named_entry->backing.map;
2256                         vm_map_lock(submap);
2257                         vm_map_reference(submap);
2258                         vm_map_unlock(submap);
2259                         named_entry_unlock(named_entry);
2260
2261                         result = vm_map_enter(target_map,
2262                                               &map_addr,
2263                                               map_size,
2264                                               mask,
2265                                               flags | VM_FLAGS_SUBMAP,
2266                                               (vm_object_t) submap,
2267                                               offset,
2268                                               copy,
2269                                               cur_protection,
2270                                               max_protection,
2271                                               inheritance);
2272                         if (result != KERN_SUCCESS) {
2273                                 vm_map_deallocate(submap);
2274                         } else {
2275                                 /*
2276                                  * No need to lock "submap" just to check its
2277                                  * "mapped" flag: that flag is never reset
2278                                  * once it's been set and if we race, we'll
2279                                  * just end up setting it twice, which is OK.
2280                                  */
2281                                 if (submap->mapped == FALSE) {
2282                                         /*
2283                                          * This submap has never been mapped.
2284                                          * Set its "mapped" flag now that it
2285                                          * has been mapped.
2286                                          * This happens only for the first ever
2287                                          * mapping of a "submap".
2288                                          */
2289                                         vm_map_lock(submap);
2290                                         submap->mapped = TRUE;
2291                                         vm_map_unlock(submap);
2292                                 }
2293                                 *address = map_addr;
2294                         }
2295                         return result;
2296
2297                 } else if (named_entry->is_pager) {
2298                         unsigned int    access;
2299                         vm_prot_t       protections;
2300                         unsigned int    wimg_mode;
2301
2302                         protections = named_entry->protection & VM_PROT_ALL;
2303                         access = GET_MAP_MEM(named_entry->protection);
2304
2305                         object = vm_object_enter(named_entry->backing.pager,
2306                                                  named_entry->size,
2307                                                  named_entry->internal,
2308                                                  FALSE,
2309                                                  FALSE);
2310                         if (object == VM_OBJECT_NULL) {
2311                                 named_entry_unlock(named_entry);
2312                                 return KERN_INVALID_OBJECT;
2313                         }
2314
2315                         /* JMM - drop reference on pager here */
2316
2317                         /* create an extra ref for the named entry */
2318                         vm_object_lock(object);
2319                         vm_object_reference_locked(object);
2320                         named_entry->backing.object = object;
2321                         named_entry->is_pager = FALSE;
2322                         named_entry_unlock(named_entry);
2323
2324                         wimg_mode = object->wimg_bits;
2325
2326                         if (access == MAP_MEM_IO) {
2327                                 wimg_mode = VM_WIMG_IO;
2328                         } else if (access == MAP_MEM_COPYBACK) {
2329                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2330                         } else if (access == MAP_MEM_WTHRU) {
2331                                 wimg_mode = VM_WIMG_WTHRU;
2332                         } else if (access == MAP_MEM_WCOMB) {
2333                                 wimg_mode = VM_WIMG_WCOMB;
2334                         }
2335
2336                         /* wait for object (if any) to be ready */
2337                         if (!named_entry->internal) {
2338                                 while (!object->pager_ready) {
2339                                         vm_object_wait(
2340                                                 object,
2341                                                 VM_OBJECT_EVENT_PAGER_READY,
2342                                                 THREAD_UNINT);
2343                                         vm_object_lock(object);
2344                                 }
2345                         }
2346
2347                         if (object->wimg_bits != wimg_mode)
2348                                 vm_object_change_wimg_mode(object, wimg_mode);
2349
2350                         object->true_share = TRUE;
2351
2352                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2353                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2354                         vm_object_unlock(object);
2355                 } else {
2356                         /* This is the case where we are going to map */
2357                         /* an already mapped object.  If the object is */
2358                         /* not ready it is internal.  An external     */
2359                         /* object cannot be mapped until it is ready  */
2360                         /* we can therefore avoid the ready check     */
2361                         /* in this case.  */
2362                         object = named_entry->backing.object;
2363                         assert(object != VM_OBJECT_NULL);
2364                         named_entry_unlock(named_entry);
2365                         vm_object_reference(object);
2366                 }
2367         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2368                 /*
2369                  * JMM - This is temporary until we unify named entries
2370                  * and raw memory objects.
2371                  *
2372                  * Detected fake ip_kotype for a memory object.  In
2373                  * this case, the port isn't really a port at all, but
2374                  * instead is just a raw memory object.
2375                  */
2376
2377                 object = vm_object_enter((memory_object_t)port,
2378                                          size, FALSE, FALSE, FALSE);
2379                 if (object == VM_OBJECT_NULL)
2380                         return KERN_INVALID_OBJECT;
2381
2382                 /* wait for object (if any) to be ready */
2383                 if (object != VM_OBJECT_NULL) {
2384                         if (object == kernel_object) {
2385                                 printf("Warning: Attempt to map kernel object"
2386                                         " by a non-private kernel entity\n");
2387                                 return KERN_INVALID_OBJECT;
2388                         }
2389                         if (!object->pager_ready) {
2390                                 vm_object_lock(object);
2391
2392                                 while (!object->pager_ready) {
2393                                         vm_object_wait(object,
2394                                                        VM_OBJECT_EVENT_PAGER_READY,
2395                                                        THREAD_UNINT);
2396                                         vm_object_lock(object);
2397                                 }
2398                                 vm_object_unlock(object);
2399                         }
2400                 }
2401         } else {
2402                 return KERN_INVALID_OBJECT;
2403         }
2404
2405         if (object != VM_OBJECT_NULL &&
2406             object->named &&
2407             object->pager != MEMORY_OBJECT_NULL &&
2408             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2409                 memory_object_t pager;
2410                 vm_prot_t       pager_prot;
2411                 kern_return_t   kr;
2412
2413                 /*
2414                  * For "named" VM objects, let the pager know that the
2415                  * memory object is being mapped.  Some pagers need to keep
2416                  * track of this, to know when they can reclaim the memory
2417                  * object, for example.
2418                  * VM calls memory_object_map() for each mapping (specifying
2419                  * the protection of each mapping) and calls
2420                  * memory_object_last_unmap() when all the mappings are gone.
2421                  */
2422                 pager_prot = max_protection;
2423                 if (copy) {
2424                         /*
2425                          * Copy-On-Write mapping: won't modify the
2426                          * memory object.
2427                          */
2428                         pager_prot &= ~VM_PROT_WRITE;
2429                 }
2430                 vm_object_lock(object);
2431                 pager = object->pager;
2432                 if (object->named &&
2433                     pager != MEMORY_OBJECT_NULL &&
2434                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2435                         assert(object->pager_ready);
2436                         vm_object_mapping_wait(object, THREAD_UNINT);
2437                         vm_object_mapping_begin(object);
2438                         vm_object_unlock(object);
2439
2440                         kr = memory_object_map(pager, pager_prot);
2441                         assert(kr == KERN_SUCCESS);
2442
2443                         vm_object_lock(object);
2444                         vm_object_mapping_end(object);
2445                 }
2446                 vm_object_unlock(object);
2447         }
2448
2449         /*
2450          *      Perform the copy if requested
2451          */
2452
2453         if (copy) {
2454                 vm_object_t             new_object;
2455                 vm_object_offset_t      new_offset;
2456
2457                 result = vm_object_copy_strategically(object, offset, size,
2458                                                       &new_object, &new_offset,
2459                                                       &copy);
2460
2461
2462                 if (result == KERN_MEMORY_RESTART_COPY) {
2463                         boolean_t success;
2464                         boolean_t src_needs_copy;
2465
2466                         /*
2467                          * XXX
2468                          * We currently ignore src_needs_copy.
2469                          * This really is the issue of how to make
2470                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2471                          * non-kernel users to use. Solution forthcoming.
2472                          * In the meantime, since we don't allow non-kernel
2473                          * memory managers to specify symmetric copy,
2474                          * we won't run into problems here.
2475                          */
2476                         new_object = object;
2477                         new_offset = offset;
2478                         success = vm_object_copy_quickly(&new_object,
2479                                                          new_offset, size,
2480                                                          &src_needs_copy,
2481                                                          &copy);
2482                         assert(success);
2483                         result = KERN_SUCCESS;
2484                 }
2485                 /*
2486                  *      Throw away the reference to the
2487                  *      original object, as it won't be mapped.
2488                  */
2489
2490                 vm_object_deallocate(object);
2491
2492                 if (result != KERN_SUCCESS)
2493                         return result;
2494
2495                 object = new_object;
2496                 offset = new_offset;
2497         }
2498
2499         result = vm_map_enter(target_map,
2500                               &map_addr, map_size,
2501                               (vm_map_offset_t)mask,
2502                               flags,
2503                               object, offset,
2504                               copy,
2505                               cur_protection, max_protection, inheritance);
2506         if (result != KERN_SUCCESS)
2507                 vm_object_deallocate(object);
2508         *address = map_addr;
2509         return result;
2510 }
2511
2512
2513
2514
2515 kern_return_t
2516 vm_map_enter_mem_object_control(
2517         vm_map_t                target_map,
2518         vm_map_offset_t         *address,
2519         vm_map_size_t           initial_size,
2520         vm_map_offset_t         mask,
2521         int                     flags,
2522         memory_object_control_t control,
2523         vm_object_offset_t      offset,
2524         boolean_t               copy,
2525         vm_prot_t               cur_protection,
2526         vm_prot_t               max_protection,
2527         vm_inherit_t            inheritance)
2528 {
2529         vm_map_address_t        map_addr;
2530         vm_map_size_t           map_size;
2531         vm_object_t             object;
2532         vm_object_size_t        size;
2533         kern_return_t           result;
2534         memory_object_t         pager;
2535         vm_prot_t               pager_prot;
2536         kern_return_t           kr;
2537
2538         /*
2539          * Check arguments for validity
2540          */
2541         if ((target_map == VM_MAP_NULL) ||
2542             (cur_protection & ~VM_PROT_ALL) ||
2543             (max_protection & ~VM_PROT_ALL) ||
2544             (inheritance > VM_INHERIT_LAST_VALID) ||
2545             initial_size == 0)
2546                 return KERN_INVALID_ARGUMENT;
2547
2548         map_addr = vm_map_trunc_page(*address);
2549         map_size = vm_map_round_page(initial_size);
2550         size = vm_object_round_page(initial_size);
2551
2552         object = memory_object_control_to_vm_object(control);
2553
2554         if (object == VM_OBJECT_NULL)
2555                 return KERN_INVALID_OBJECT;
2556
2557         if (object == kernel_object) {
2558                 printf("Warning: Attempt to map kernel object"
2559                        " by a non-private kernel entity\n");
2560                 return KERN_INVALID_OBJECT;
2561         }
2562
2563         vm_object_lock(object);
2564         object->ref_count++;
2565         vm_object_res_reference(object);
2566
2567         /*
2568          * For "named" VM objects, let the pager know that the
2569          * memory object is being mapped.  Some pagers need to keep
2570          * track of this, to know when they can reclaim the memory
2571          * object, for example.
2572          * VM calls memory_object_map() for each mapping (specifying
2573          * the protection of each mapping) and calls
2574          * memory_object_last_unmap() when all the mappings are gone.
2575          */
2576         pager_prot = max_protection;
2577         if (copy) {
2578                 pager_prot &= ~VM_PROT_WRITE;
2579         }
2580         pager = object->pager;
2581         if (object->named &&
2582             pager != MEMORY_OBJECT_NULL &&
2583             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2584                 assert(object->pager_ready);
2585                 vm_object_mapping_wait(object, THREAD_UNINT);
2586                 vm_object_mapping_begin(object);
2587                 vm_object_unlock(object);
2588
2589                 kr = memory_object_map(pager, pager_prot);
2590                 assert(kr == KERN_SUCCESS);
2591
2592                 vm_object_lock(object);
2593                 vm_object_mapping_end(object);
2594         }
2595         vm_object_unlock(object);
2596
2597         /*
2598          *      Perform the copy if requested
2599          */
2600
2601         if (copy) {
2602                 vm_object_t             new_object;
2603                 vm_object_offset_t      new_offset;
2604
2605                 result = vm_object_copy_strategically(object, offset, size,
2606                                                       &new_object, &new_offset,
2607                                                       &copy);
2608
2609
2610                 if (result == KERN_MEMORY_RESTART_COPY) {
2611                         boolean_t success;
2612                         boolean_t src_needs_copy;
2613
2614                         /*
2615                          * XXX
2616                          * We currently ignore src_needs_copy.
2617                          * This really is the issue of how to make
2618                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2619                          * non-kernel users to use. Solution forthcoming.
2620                          * In the meantime, since we don't allow non-kernel
2621                          * memory managers to specify symmetric copy,
2622                          * we won't run into problems here.
2623                          */
2624                         new_object = object;
2625                         new_offset = offset;
2626                         success = vm_object_copy_quickly(&new_object,
2627                                                          new_offset, size,
2628                                                          &src_needs_copy,
2629                                                          &copy);
2630                         assert(success);
2631                         result = KERN_SUCCESS;
2632                 }
2633                 /*
2634                  *      Throw away the reference to the
2635                  *      original object, as it won't be mapped.
2636                  */
2637
2638                 vm_object_deallocate(object);
2639
2640                 if (result != KERN_SUCCESS)
2641                         return result;
2642
2643                 object = new_object;
2644                 offset = new_offset;
2645         }
2646
2647         result = vm_map_enter(target_map,
2648                               &map_addr, map_size,
2649                               (vm_map_offset_t)mask,
2650                               flags,
2651                               object, offset,
2652                               copy,
2653                               cur_protection, max_protection, inheritance);
2654         if (result != KERN_SUCCESS)
2655                 vm_object_deallocate(object);
2656         *address = map_addr;
2657
2658         return result;
2659 }
2660
2661
2662 #if     VM_CPM
2663
2664 #ifdef MACH_ASSERT
2665 extern pmap_paddr_t     avail_start, avail_end;
2666 #endif
2667
2668 /*
2669  *      Allocate memory in the specified map, with the caveat that
2670  *      the memory is physically contiguous.  This call may fail
2671  *      if the system can't find sufficient contiguous memory.
2672  *      This call may cause or lead to heart-stopping amounts of
2673  *      paging activity.
2674  *
2675  *      Memory obtained from this call should be freed in the
2676  *      normal way, viz., via vm_deallocate.
2677  */
2678 kern_return_t
2679 vm_map_enter_cpm(
2680         vm_map_t                map,
2681         vm_map_offset_t *addr,
2682         vm_map_size_t           size,
2683         int                     flags)
2684 {
2685         vm_object_t             cpm_obj;
2686         pmap_t                  pmap;
2687         vm_page_t               m, pages;
2688         kern_return_t           kr;
2689         vm_map_offset_t         va, start, end, offset;
2690 #if     MACH_ASSERT
2691         vm_map_offset_t         prev_addr;
2692 #endif  /* MACH_ASSERT */
2693
2694         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2695
2696         if (!vm_allocate_cpm_enabled)
2697                 return KERN_FAILURE;
2698
2699         if (size == 0) {
2700                 *addr = 0;
2701                 return KERN_SUCCESS;
2702         }
2703         if (anywhere)
2704                 *addr = vm_map_min(map);
2705         else
2706                 *addr = vm_map_trunc_page(*addr);
2707         size = vm_map_round_page(size);
2708
2709         /*
2710          * LP64todo - cpm_allocate should probably allow
2711          * allocations of >4GB, but not with the current
2712          * algorithm, so just cast down the size for now.
2713          */
2714         if (size > VM_MAX_ADDRESS)
2715                 return KERN_RESOURCE_SHORTAGE;
2716         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2717                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2718                 return kr;
2719
2720         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2721         assert(cpm_obj != VM_OBJECT_NULL);
2722         assert(cpm_obj->internal);
2723         assert(cpm_obj->size == (vm_object_size_t)size);
2724         assert(cpm_obj->can_persist == FALSE);
2725         assert(cpm_obj->pager_created == FALSE);
2726         assert(cpm_obj->pageout == FALSE);
2727         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2728
2729         /*
2730          *      Insert pages into object.
2731          */
2732
2733         vm_object_lock(cpm_obj);
2734         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2735                 m = pages;
2736                 pages = NEXT_PAGE(m);
2737                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2738
2739                 assert(!m->gobbled);
2740                 assert(!m->wanted);
2741                 assert(!m->pageout);
2742                 assert(!m->tabled);
2743                 assert(VM_PAGE_WIRED(m));
2744                 /*
2745                  * ENCRYPTED SWAP:
2746                  * "m" is not supposed to be pageable, so it
2747                  * should not be encrypted.  It wouldn't be safe
2748                  * to enter it in a new VM object while encrypted.
2749                  */
2750                 ASSERT_PAGE_DECRYPTED(m);
2751                 assert(m->busy);
2752                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2753
2754                 m->busy = FALSE;
2755                 vm_page_insert(m, cpm_obj, offset);
2756         }
2757         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2758         vm_object_unlock(cpm_obj);
2759
2760         /*
2761          *      Hang onto a reference on the object in case a
2762          *      multi-threaded application for some reason decides
2763          *      to deallocate the portion of the address space into
2764          *      which we will insert this object.
2765          *
2766          *      Unfortunately, we must insert the object now before
2767          *      we can talk to the pmap module about which addresses
2768          *      must be wired down.  Hence, the race with a multi-
2769          *      threaded app.
2770          */
2771         vm_object_reference(cpm_obj);
2772
2773         /*
2774          *      Insert object into map.
2775          */
2776
2777         kr = vm_map_enter(
2778                 map,
2779                 addr,
2780                 size,
2781                 (vm_map_offset_t)0,
2782                 flags,
2783                 cpm_obj,
2784                 (vm_object_offset_t)0,
2785                 FALSE,
2786                 VM_PROT_ALL,
2787                 VM_PROT_ALL,
2788                 VM_INHERIT_DEFAULT);
2789
2790         if (kr != KERN_SUCCESS) {
2791                 /*
2792                  *      A CPM object doesn't have can_persist set,
2793                  *      so all we have to do is deallocate it to
2794                  *      free up these pages.
2795                  */
2796                 assert(cpm_obj->pager_created == FALSE);
2797                 assert(cpm_obj->can_persist == FALSE);
2798                 assert(cpm_obj->pageout == FALSE);
2799                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2800                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2801                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2802         }
2803
2804         /*
2805          *      Inform the physical mapping system that the
2806          *      range of addresses may not fault, so that
2807          *      page tables and such can be locked down as well.
2808          */
2809         start = *addr;
2810         end = start + size;
2811         pmap = vm_map_pmap(map);
2812         pmap_pageable(pmap, start, end, FALSE);
2813
2814         /*
2815          *      Enter each page into the pmap, to avoid faults.
2816          *      Note that this loop could be coded more efficiently,
2817          *      if the need arose, rather than looking up each page
2818          *      again.
2819          */
2820         for (offset = 0, va = start; offset < size;
2821              va += PAGE_SIZE, offset += PAGE_SIZE) {
2822                 int type_of_fault;
2823
2824                 vm_object_lock(cpm_obj);
2825                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2826                 assert(m != VM_PAGE_NULL);
2827
2828                 vm_page_zero_fill(m);
2829
2830                 type_of_fault = DBG_ZERO_FILL_FAULT;
2831
2832                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2833                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2834                                &type_of_fault);
2835
2836                 vm_object_unlock(cpm_obj);
2837         }
2838
2839 #if     MACH_ASSERT
2840         /*
2841          *      Verify ordering in address space.
2842          */
2843         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2844                 vm_object_lock(cpm_obj);
2845                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2846                 vm_object_unlock(cpm_obj);
2847                 if (m == VM_PAGE_NULL)
2848                         panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2849                               cpm_obj, offset);
2850                 assert(m->tabled);
2851                 assert(!m->busy);
2852                 assert(!m->wanted);
2853                 assert(!m->fictitious);
2854                 assert(!m->private);
2855                 assert(!m->absent);
2856                 assert(!m->error);
2857                 assert(!m->cleaning);
2858                 assert(!m->precious);
2859                 assert(!m->clustered);
2860                 if (offset != 0) {
2861                         if (m->phys_page != prev_addr + 1) {
2862                                 printf("start 0x%x end 0x%x va 0x%x\n",
2863                                        start, end, va);
2864                                 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2865                                 printf("m 0x%x prev_address 0x%x\n", m,
2866                                        prev_addr);
2867                                 panic("vm_allocate_cpm:  pages not contig!");
2868                         }
2869                 }
2870                 prev_addr = m->phys_page;
2871         }
2872 #endif  /* MACH_ASSERT */
2873
2874         vm_object_deallocate(cpm_obj); /* kill extra ref */
2875
2876         return kr;
2877 }
2878
2879
2880 #else   /* VM_CPM */
2881
2882 /*
2883  *      Interface is defined in all cases, but unless the kernel
2884  *      is built explicitly for this option, the interface does
2885  *      nothing.
2886  */
2887
2888 kern_return_t
2889 vm_map_enter_cpm(
2890         __unused vm_map_t       map,
2891         __unused vm_map_offset_t        *addr,
2892         __unused vm_map_size_t  size,
2893         __unused int            flags)
2894 {
2895         return KERN_FAILURE;
2896 }
2897 #endif /* VM_CPM */
2898
2899 /* Not used without nested pmaps */
2900 #ifndef NO_NESTED_PMAP
2901 /*
2902  * Clip and unnest a portion of a nested submap mapping.
2903  */
2904
2905
2906 static void
2907 vm_map_clip_unnest(
2908         vm_map_t        map,
2909         vm_map_entry_t  entry,
2910         vm_map_offset_t start_unnest,
2911         vm_map_offset_t end_unnest)
2912 {
2913         vm_map_offset_t old_start_unnest = start_unnest;
2914         vm_map_offset_t old_end_unnest = end_unnest;
2915
2916         assert(entry->is_sub_map);
2917         assert(entry->object.sub_map != NULL);
2918
2919         /*
2920          * Query the platform for the optimal unnest range.
2921          * DRK: There's some duplication of effort here, since
2922          * callers may have adjusted the range to some extent. This
2923          * routine was introduced to support 1GiB subtree nesting
2924          * for x86 platforms, which can also nest on 2MiB boundaries
2925          * depending on size/alignment.
2926          */
2927         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2928                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2929         }
2930
2931         if (entry->vme_start > start_unnest ||
2932             entry->vme_end < end_unnest) {
2933                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2934                       "bad nested entry: start=0x%llx end=0x%llx\n",
2935                       (long long)start_unnest, (long long)end_unnest,
2936                       (long long)entry->vme_start, (long long)entry->vme_end);
2937         }
2938
2939         if (start_unnest > entry->vme_start) {
2940                 _vm_map_clip_start(&map->hdr,
2941                                    entry,
2942                                    start_unnest);
2943                 vm_map_store_update_first_free(map, map->first_free);
2944         }
2945         if (entry->vme_end > end_unnest) {
2946                 _vm_map_clip_end(&map->hdr,
2947                                  entry,
2948                                  end_unnest);
2949                 vm_map_store_update_first_free(map, map->first_free);
2950         }
2951
2952         pmap_unnest(map->pmap,
2953                     entry->vme_start,
2954                     entry->vme_end - entry->vme_start);
2955         if ((map->mapped) && (map->ref_count)) {
2956                 /* clean up parent map/maps */
2957                 vm_map_submap_pmap_clean(
2958                         map, entry->vme_start,
2959                         entry->vme_end,
2960                         entry->object.sub_map,
2961                         entry->offset);
2962         }
2963         entry->use_pmap = FALSE;
2964 }
2965 #endif  /* NO_NESTED_PMAP */
2966
2967 /*
2968  *      vm_map_clip_start:      [ internal use only ]
2969  *
2970  *      Asserts that the given entry begins at or after
2971  *      the specified address; if necessary,
2972  *      it splits the entry into two.
2973  */
2974 static void
2975 vm_map_clip_start(
2976         vm_map_t        map,
2977         vm_map_entry_t  entry,
2978         vm_map_offset_t startaddr)
2979 {
2980 #ifndef NO_NESTED_PMAP
2981         if (entry->use_pmap &&
2982             startaddr >= entry->vme_start) {
2983                 vm_map_offset_t start_unnest, end_unnest;
2984
2985                 /*
2986                  * Make sure "startaddr" is no longer in a nested range
2987                  * before we clip.  Unnest only the minimum range the platform
2988                  * can handle.
2989                  * vm_map_clip_unnest may perform additional adjustments to
2990                  * the unnest range.
2991                  */
2992                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2993                 end_unnest = start_unnest + pmap_nesting_size_min;
2994                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2995         }
2996 #endif /* NO_NESTED_PMAP */
2997         if (startaddr > entry->vme_start) {
2998                 if (entry->object.vm_object &&
2999                     !entry->is_sub_map &&
3000                     entry->object.vm_object->phys_contiguous) {
3001                         pmap_remove(map->pmap,
3002                                     (addr64_t)(entry->vme_start),
3003                                     (addr64_t)(entry->vme_end));
3004                 }
3005                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3006                 vm_map_store_update_first_free(map, map->first_free);
3007         }
3008 }
3009
3010
3011 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3012         MACRO_BEGIN \
3013         if ((startaddr) > (entry)->vme_start) \
3014                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3015         MACRO_END
3016
3017 /*
3018  *      This routine is called only when it is known that
3019  *      the entry must be split.
3020  */
3021 static void
3022 _vm_map_clip_start(
3023         register struct vm_map_header   *map_header,
3024         register vm_map_entry_t         entry,
3025         register vm_map_offset_t                start)
3026 {
3027         register vm_map_entry_t new_entry;
3028
3029         /*
3030          *      Split off the front portion --
3031          *      note that we must insert the new
3032          *      entry BEFORE this one, so that
3033          *      this entry has the specified starting
3034          *      address.
3035          */
3036
3037         new_entry = _vm_map_entry_create(map_header);
3038         vm_map_entry_copy_full(new_entry, entry);
3039
3040         new_entry->vme_end = start;
3041         entry->offset += (start - entry->vme_start);
3042         entry->vme_start = start;
3043
3044         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3045
3046         if (entry->is_sub_map)
3047                 vm_map_reference(new_entry->object.sub_map);
3048         else
3049                 vm_object_reference(new_entry->object.vm_object);
3050 }
3051
3052
3053 /*
3054  *      vm_map_clip_end:        [ internal use only ]
3055  *
3056  *      Asserts that the given entry ends at or before
3057  *      the specified address; if necessary,
3058  *      it splits the entry into two.
3059  */
3060 static void
3061 vm_map_clip_end(
3062         vm_map_t        map,
3063         vm_map_entry_t  entry,
3064         vm_map_offset_t endaddr)
3065 {
3066         if (endaddr > entry->vme_end) {
3067                 /*
3068                  * Within the scope of this clipping, limit "endaddr" to
3069                  * the end of this map entry...
3070                  */
3071                 endaddr = entry->vme_end;
3072         }
3073 #ifndef NO_NESTED_PMAP
3074         if (entry->use_pmap) {
3075                 vm_map_offset_t start_unnest, end_unnest;
3076
3077                 /*
3078                  * Make sure the range between the start of this entry and
3079                  * the new "endaddr" is no longer nested before we clip.
3080                  * Unnest only the minimum range the platform can handle.
3081                  * vm_map_clip_unnest may perform additional adjustments to
3082                  * the unnest range.
3083                  */
3084                 start_unnest = entry->vme_start;
3085                 end_unnest =
3086                         (endaddr + pmap_nesting_size_min - 1) &
3087                         ~(pmap_nesting_size_min - 1);
3088                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3089         }
3090 #endif /* NO_NESTED_PMAP */
3091         if (endaddr < entry->vme_end) {
3092                 if (entry->object.vm_object &&
3093                     !entry->is_sub_map &&
3094                     entry->object.vm_object->phys_contiguous) {
3095                         pmap_remove(map->pmap,
3096                                     (addr64_t)(entry->vme_start),
3097                                     (addr64_t)(entry->vme_end));
3098                 }
3099                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3100                 vm_map_store_update_first_free(map, map->first_free);
3101         }
3102 }
3103
3104
3105 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3106         MACRO_BEGIN \
3107         if ((endaddr) < (entry)->vme_end) \
3108                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3109         MACRO_END
3110
3111 /*
3112  *      This routine is called only when it is known that
3113  *      the entry must be split.
3114  */
3115 static void
3116 _vm_map_clip_end(
3117         register struct vm_map_header   *map_header,
3118         register vm_map_entry_t         entry,
3119         register vm_map_offset_t        end)
3120 {
3121         register vm_map_entry_t new_entry;
3122
3123         /*
3124          *      Create a new entry and insert it
3125          *      AFTER the specified entry
3126          */
3127
3128         new_entry = _vm_map_entry_create(map_header);
3129         vm_map_entry_copy_full(new_entry, entry);
3130
3131         new_entry->vme_start = entry->vme_end = end;
3132         new_entry->offset += (end - entry->vme_start);
3133
3134         _vm_map_store_entry_link(map_header, entry, new_entry);
3135
3136         if (entry->is_sub_map)
3137                 vm_map_reference(new_entry->object.sub_map);
3138         else
3139                 vm_object_reference(new_entry->object.vm_object);
3140 }
3141
3142
3143 /*
3144  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3145  *
3146  *      Asserts that the starting and ending region
3147  *      addresses fall within the valid range of the map.
3148  */
3149 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3150         MACRO_BEGIN                             \
3151         if (start < vm_map_min(map))            \
3152                 start = vm_map_min(map);        \
3153         if (end > vm_map_max(map))              \
3154                 end = vm_map_max(map);          \
3155         if (start > end)                        \
3156                 start = end;                    \
3157         MACRO_END
3158
3159 /*
3160  *      vm_map_range_check:     [ internal use only ]
3161  *
3162  *      Check that the region defined by the specified start and
3163  *      end addresses are wholly contained within a single map
3164  *      entry or set of adjacent map entries of the spacified map,
3165  *      i.e. the specified region contains no unmapped space.
3166  *      If any or all of the region is unmapped, FALSE is returned.
3167  *      Otherwise, TRUE is returned and if the output argument 'entry'
3168  *      is not NULL it points to the map entry containing the start
3169  *      of the region.
3170  *
3171  *      The map is locked for reading on entry and is left locked.
3172  */
3173 static boolean_t
3174 vm_map_range_check(
3175         register vm_map_t       map,
3176         register vm_map_offset_t        start,
3177         register vm_map_offset_t        end,
3178         vm_map_entry_t          *entry)
3179 {
3180         vm_map_entry_t          cur;
3181         register vm_map_offset_t        prev;
3182
3183         /*
3184          *      Basic sanity checks first
3185          */
3186         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3187                 return (FALSE);
3188
3189         /*
3190          *      Check first if the region starts within a valid
3191          *      mapping for the map.
3192          */
3193         if (!vm_map_lookup_entry(map, start, &cur))
3194                 return (FALSE);
3195
3196         /*
3197          *      Optimize for the case that the region is contained
3198          *      in a single map entry.
3199          */
3200         if (entry != (vm_map_entry_t *) NULL)
3201                 *entry = cur;
3202         if (end <= cur->vme_end)
3203                 return (TRUE);
3204
3205         /*
3206          *      If the region is not wholly contained within a
3207          *      single entry, walk the entries looking for holes.
3208          */
3209         prev = cur->vme_end;
3210         cur = cur->vme_next;
3211         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3212                 if (end <= cur->vme_end)
3213                         return (TRUE);
3214                 prev = cur->vme_end;
3215                 cur = cur->vme_next;
3216         }
3217         return (FALSE);
3218 }
3219
3220 /*
3221  *      vm_map_submap:          [ kernel use only ]
3222  *
3223  *      Mark the given range as handled by a subordinate map.
3224  *
3225  *      This range must have been created with vm_map_find using
3226  *      the vm_submap_object, and no other operations may have been
3227  *      performed on this range prior to calling vm_map_submap.
3228  *
3229  *      Only a limited number of operations can be performed
3230  *      within this rage after calling vm_map_submap:
3231  *              vm_fault
3232  *      [Don't try vm_map_copyin!]
3233  *
3234  *      To remove a submapping, one must first remove the
3235  *      range from the superior map, and then destroy the
3236  *      submap (if desired).  [Better yet, don't try it.]
3237  */
3238 kern_return_t
3239 vm_map_submap(
3240         vm_map_t                map,
3241         vm_map_offset_t start,
3242         vm_map_offset_t end,
3243         vm_map_t                submap,
3244         vm_map_offset_t offset,
3245 #ifdef NO_NESTED_PMAP
3246         __unused
3247 #endif  /* NO_NESTED_PMAP */
3248         boolean_t               use_pmap)
3249 {
3250         vm_map_entry_t          entry;
3251         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3252         register vm_object_t    object;
3253
3254         vm_map_lock(map);
3255
3256         if (! vm_map_lookup_entry(map, start, &entry)) {
3257                 entry = entry->vme_next;
3258         }
3259
3260         if (entry == vm_map_to_entry(map) ||
3261             entry->is_sub_map) {
3262                 vm_map_unlock(map);
3263                 return KERN_INVALID_ARGUMENT;
3264         }
3265
3266         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3267         vm_map_clip_start(map, entry, start);
3268         vm_map_clip_end(map, entry, end);
3269
3270         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3271             (!entry->is_sub_map) &&
3272             ((object = entry->object.vm_object) == vm_submap_object) &&
3273             (object->resident_page_count == 0) &&
3274             (object->copy == VM_OBJECT_NULL) &&
3275             (object->shadow == VM_OBJECT_NULL) &&
3276             (!object->pager_created)) {
3277                 entry->offset = (vm_object_offset_t)offset;
3278                 entry->object.vm_object = VM_OBJECT_NULL;
3279                 vm_object_deallocate(object);
3280                 entry->is_sub_map = TRUE;
3281                 entry->object.sub_map = submap;
3282                 vm_map_reference(submap);
3283                 submap->mapped = TRUE;
3284
3285 #ifndef NO_NESTED_PMAP
3286                 if (use_pmap) {
3287                         /* nest if platform code will allow */
3288                         if(submap->pmap == NULL) {
3289                                 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3290                                 if(submap->pmap == PMAP_NULL) {
3291                                         vm_map_unlock(map);
3292                                         return(KERN_NO_SPACE);
3293                                 }
3294                         }
3295                         result = pmap_nest(map->pmap,
3296                                            (entry->object.sub_map)->pmap,
3297                                            (addr64_t)start,
3298                                            (addr64_t)start,
3299                                            (uint64_t)(end - start));
3300                         if(result)
3301                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3302                         entry->use_pmap = TRUE;
3303                 }
3304 #else   /* NO_NESTED_PMAP */
3305                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3306 #endif  /* NO_NESTED_PMAP */
3307                 result = KERN_SUCCESS;
3308         }
3309         vm_map_unlock(map);
3310
3311         return(result);
3312 }
3313
3314 /*
3315  *      vm_map_protect:
3316  *
3317  *      Sets the protection of the specified address
3318  *      region in the target map.  If "set_max" is
3319  *      specified, the maximum protection is to be set;
3320  *      otherwise, only the current protection is affected.
3321  */
3322 kern_return_t
3323 vm_map_protect(
3324         register vm_map_t       map,
3325         register vm_map_offset_t        start,
3326         register vm_map_offset_t        end,
3327         register vm_prot_t      new_prot,
3328         register boolean_t      set_max)
3329 {
3330         register vm_map_entry_t         current;
3331         register vm_map_offset_t        prev;
3332         vm_map_entry_t                  entry;
3333         vm_prot_t                       new_max;
3334
3335         XPR(XPR_VM_MAP,
3336             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3337             map, start, end, new_prot, set_max);
3338
3339         vm_map_lock(map);
3340
3341         /* LP64todo - remove this check when vm_map_commpage64()
3342          * no longer has to stuff in a map_entry for the commpage
3343          * above the map's max_offset.
3344          */
3345         if (start >= map->max_offset) {
3346                 vm_map_unlock(map);
3347                 return(KERN_INVALID_ADDRESS);
3348         }
3349
3350         while(1) {
3351                 /*
3352                  *      Lookup the entry.  If it doesn't start in a valid
3353                  *      entry, return an error.
3354                  */
3355                 if (! vm_map_lookup_entry(map, start, &entry)) {
3356                         vm_map_unlock(map);
3357                         return(KERN_INVALID_ADDRESS);
3358                 }
3359
3360                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3361                         start = SUPERPAGE_ROUND_DOWN(start);
3362                         continue;
3363                 }
3364                 break;
3365         }
3366         if (entry->superpage_size)
3367                 end = SUPERPAGE_ROUND_UP(end);
3368
3369         /*
3370          *      Make a first pass to check for protection and address
3371          *      violations.
3372          */
3373
3374         current = entry;
3375         prev = current->vme_start;
3376         while ((current != vm_map_to_entry(map)) &&
3377                (current->vme_start < end)) {
3378
3379                 /*
3380                  * If there is a hole, return an error.
3381                  */
3382                 if (current->vme_start != prev) {
3383                         vm_map_unlock(map);
3384                         return(KERN_INVALID_ADDRESS);
3385                 }
3386
3387                 new_max = current->max_protection;
3388                 if(new_prot & VM_PROT_COPY) {
3389                         new_max |= VM_PROT_WRITE;
3390                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3391                                 vm_map_unlock(map);
3392                                 return(KERN_PROTECTION_FAILURE);
3393                         }
3394                 } else {
3395                         if ((new_prot & new_max) != new_prot) {
3396                                 vm_map_unlock(map);
3397                                 return(KERN_PROTECTION_FAILURE);
3398                         }
3399                 }
3400
3401 #if CONFIG_EMBEDDED
3402                 if (new_prot & VM_PROT_WRITE) {
3403                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3404                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3405                                 new_prot &= ~VM_PROT_EXECUTE;
3406                         }
3407                 }
3408 #endif
3409
3410                 prev = current->vme_end;
3411                 current = current->vme_next;
3412         }
3413         if (end > prev) {
3414                 vm_map_unlock(map);
3415                 return(KERN_INVALID_ADDRESS);
3416         }
3417
3418         /*
3419          *      Go back and fix up protections.
3420          *      Clip to start here if the range starts within
3421          *      the entry.
3422          */
3423
3424         current = entry;
3425         if (current != vm_map_to_entry(map)) {
3426                 /* clip and unnest if necessary */
3427                 vm_map_clip_start(map, current, start);
3428         }
3429
3430         while ((current != vm_map_to_entry(map)) &&
3431                (current->vme_start < end)) {
3432
3433                 vm_prot_t       old_prot;
3434
3435                 vm_map_clip_end(map, current, end);
3436
3437                 assert(!current->use_pmap); /* clipping did unnest if needed */
3438
3439                 old_prot = current->protection;
3440
3441                 if(new_prot & VM_PROT_COPY) {
3442                         /* caller is asking specifically to copy the      */
3443                         /* mapped data, this implies that max protection  */
3444                         /* will include write.  Caller must be prepared   */
3445                         /* for loss of shared memory communication in the */
3446                         /* target area after taking this step */
3447
3448                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3449                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3450                                 current->offset = 0;
3451                         }
3452                         current->needs_copy = TRUE;
3453                         current->max_protection |= VM_PROT_WRITE;
3454                 }
3455
3456                 if (set_max)
3457                         current->protection =
3458                                 (current->max_protection =
3459                                  new_prot & ~VM_PROT_COPY) &
3460                                 old_prot;
3461                 else
3462                         current->protection = new_prot & ~VM_PROT_COPY;
3463
3464                 /*
3465                  *      Update physical map if necessary.
3466                  *      If the request is to turn off write protection,
3467                  *      we won't do it for real (in pmap). This is because
3468                  *      it would cause copy-on-write to fail.  We've already
3469                  *      set, the new protection in the map, so if a
3470                  *      write-protect fault occurred, it will be fixed up
3471                  *      properly, COW or not.
3472                  */
3473                 if (current->protection != old_prot) {
3474                         /* Look one level in we support nested pmaps */
3475                         /* from mapped submaps which are direct entries */
3476                         /* in our map */
3477
3478                         vm_prot_t prot;
3479
3480                         prot = current->protection & ~VM_PROT_WRITE;
3481
3482                         if (override_nx(map, current->alias) && prot)
3483                                 prot |= VM_PROT_EXECUTE;
3484
3485                         if (current->is_sub_map && current->use_pmap) {
3486                                 pmap_protect(current->object.sub_map->pmap,
3487                                              current->vme_start,
3488                                              current->vme_end,
3489                                              prot);
3490                         } else {
3491                                 pmap_protect(map->pmap,
3492                                              current->vme_start,
3493                                              current->vme_end,
3494                                              prot);
3495                         }
3496                 }
3497                 current = current->vme_next;
3498         }
3499
3500         current = entry;
3501         while ((current != vm_map_to_entry(map)) &&
3502                (current->vme_start <= end)) {
3503                 vm_map_simplify_entry(map, current);
3504                 current = current->vme_next;
3505         }
3506
3507         vm_map_unlock(map);
3508         return(KERN_SUCCESS);
3509 }
3510
3511 /*
3512  *      vm_map_inherit:
3513  *
3514  *      Sets the inheritance of the specified address
3515  *      range in the target map.  Inheritance
3516  *      affects how the map will be shared with
3517  *      child maps at the time of vm_map_fork.
3518  */
3519 kern_return_t
3520 vm_map_inherit(
3521         register vm_map_t       map,
3522         register vm_map_offset_t        start,
3523         register vm_map_offset_t        end,
3524         register vm_inherit_t   new_inheritance)
3525 {
3526         register vm_map_entry_t entry;
3527         vm_map_entry_t  temp_entry;
3528
3529         vm_map_lock(map);
3530
3531         VM_MAP_RANGE_CHECK(map, start, end);
3532
3533         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3534                 entry = temp_entry;
3535         }
3536         else {
3537                 temp_entry = temp_entry->vme_next;
3538                 entry = temp_entry;
3539         }
3540
3541         /* first check entire range for submaps which can't support the */
3542         /* given inheritance. */
3543         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3544                 if(entry->is_sub_map) {
3545                         if(new_inheritance == VM_INHERIT_COPY) {
3546                                 vm_map_unlock(map);
3547                                 return(KERN_INVALID_ARGUMENT);
3548                         }
3549                 }
3550
3551                 entry = entry->vme_next;
3552         }
3553
3554         entry = temp_entry;
3555         if (entry != vm_map_to_entry(map)) {
3556                 /* clip and unnest if necessary */
3557                 vm_map_clip_start(map, entry, start);
3558         }
3559
3560         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3561                 vm_map_clip_end(map, entry, end);
3562                 assert(!entry->use_pmap); /* clip did unnest if needed */
3563
3564                 entry->inheritance = new_inheritance;
3565
3566                 entry = entry->vme_next;
3567         }
3568
3569         vm_map_unlock(map);
3570         return(KERN_SUCCESS);
3571 }
3572
3573 /*
3574  * Update the accounting for the amount of wired memory in this map.  If the user has
3575  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3576  */
3577
3578 static kern_return_t
3579 add_wire_counts(
3580         vm_map_t        map,
3581         vm_map_entry_t  entry,
3582         boolean_t       user_wire)
3583 {
3584         vm_map_size_t   size;
3585
3586         if (user_wire) {
3587                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3588
3589                 /*
3590                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3591                  * this map entry.
3592                  */
3593
3594                 if (entry->user_wired_count == 0) {
3595                         size = entry->vme_end - entry->vme_start;
3596
3597                         /*
3598                          * Since this is the first time the user is wiring this map entry, check to see if we're
3599                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3600                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3601                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3602                          * limit, then we fail.
3603                          */
3604
3605                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3606                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3607                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3608                                 return KERN_RESOURCE_SHORTAGE;
3609
3610                         /*
3611                          * The first time the user wires an entry, we also increment the wired_count and add this to
3612                          * the total that has been wired in the map.
3613                          */
3614
3615                         if (entry->wired_count >= MAX_WIRE_COUNT)
3616                                 return KERN_FAILURE;
3617
3618                         entry->wired_count++;
3619                         map->user_wire_size += size;
3620                 }
3621
3622                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3623                         return KERN_FAILURE;
3624
3625                 entry->user_wired_count++;
3626
3627         } else {
3628
3629                 /*
3630                  * The kernel's wiring the memory.  Just bump the count and continue.
3631                  */
3632
3633                 if (entry->wired_count >= MAX_WIRE_COUNT)
3634                         panic("vm_map_wire: too many wirings");
3635
3636                 entry->wired_count++;
3637         }
3638
3639         return KERN_SUCCESS;
3640 }
3641
3642 /*
3643  * Update the memory wiring accounting now that the given map entry is being unwired.
3644  */
3645
3646 static void
3647 subtract_wire_counts(
3648         vm_map_t        map,
3649         vm_map_entry_t  entry,
3650         boolean_t       user_wire)
3651 {
3652
3653         if (user_wire) {
3654
3655                 /*
3656                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3657                  */
3658
3659                 if (entry->user_wired_count == 1) {
3660
3661                         /*
3662                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3663                          * user wired memory for this map.
3664                          */
3665
3666                         assert(entry->wired_count >= 1);
3667                         entry->wired_count--;
3668                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3669                 }
3670
3671                 assert(entry->user_wired_count >= 1);
3672                 entry->user_wired_count--;
3673
3674         } else {
3675
3676                 /*
3677                  * The kernel is unwiring the memory.   Just update the count.
3678                  */
3679
3680                 assert(entry->wired_count >= 1);
3681                 entry->wired_count--;
3682         }
3683 }
3684
3685 /*
3686  *      vm_map_wire:
3687  *
3688  *      Sets the pageability of the specified address range in the
3689  *      target map as wired.  Regions specified as not pageable require
3690  *      locked-down physical memory and physical page maps.  The
3691  *      access_type variable indicates types of accesses that must not
3692  *      generate page faults.  This is checked against protection of
3693  *      memory being locked-down.
3694  *
3695  *      The map must not be locked, but a reference must remain to the
3696  *      map throughout the call.
3697  */
3698 static kern_return_t
3699 vm_map_wire_nested(
3700         register vm_map_t       map,
3701         register vm_map_offset_t        start,
3702         register vm_map_offset_t        end,
3703         register vm_prot_t      access_type,
3704         boolean_t               user_wire,
3705         pmap_t                  map_pmap,
3706         vm_map_offset_t         pmap_addr)
3707 {
3708         register vm_map_entry_t entry;
3709         struct vm_map_entry     *first_entry, tmp_entry;
3710         vm_map_t                real_map;
3711         register vm_map_offset_t        s,e;
3712         kern_return_t           rc;
3713         boolean_t               need_wakeup;
3714         boolean_t               main_map = FALSE;
3715         wait_interrupt_t        interruptible_state;
3716         thread_t                cur_thread;
3717         unsigned int            last_timestamp;
3718         vm_map_size_t           size;
3719
3720         vm_map_lock(map);
3721         if(map_pmap == NULL)
3722                 main_map = TRUE;
3723         last_timestamp = map->timestamp;
3724
3725         VM_MAP_RANGE_CHECK(map, start, end);
3726         assert(page_aligned(start));
3727         assert(page_aligned(end));
3728         if (start == end) {
3729                 /* We wired what the caller asked for, zero pages */
3730                 vm_map_unlock(map);
3731                 return KERN_SUCCESS;
3732         }
3733
3734         need_wakeup = FALSE;
3735         cur_thread = current_thread();
3736
3737         s = start;
3738         rc = KERN_SUCCESS;
3739
3740         if (vm_map_lookup_entry(map, s, &first_entry)) {
3741                 entry = first_entry;
3742                 /*
3743                  * vm_map_clip_start will be done later.
3744                  * We don't want to unnest any nested submaps here !
3745                  */
3746         } else {
3747                 /* Start address is not in map */
3748                 rc = KERN_INVALID_ADDRESS;
3749                 goto done;
3750         }
3751
3752         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3753                 /*
3754                  * At this point, we have wired from "start" to "s".
3755                  * We still need to wire from "s" to "end".
3756                  *
3757                  * "entry" hasn't been clipped, so it could start before "s"
3758                  * and/or end after "end".
3759                  */
3760
3761                 /* "e" is how far we want to wire in this entry */
3762                 e = entry->vme_end;
3763                 if (e > end)
3764                         e = end;
3765
3766                 /*
3767                  * If another thread is wiring/unwiring this entry then
3768                  * block after informing other thread to wake us up.
3769                  */
3770                 if (entry->in_transition) {
3771                         wait_result_t wait_result;
3772
3773                         /*
3774                          * We have not clipped the entry.  Make sure that
3775                          * the start address is in range so that the lookup
3776                          * below will succeed.
3777                          * "s" is the current starting point: we've already
3778                          * wired from "start" to "s" and we still have
3779                          * to wire from "s" to "end".
3780                          */
3781
3782                         entry->needs_wakeup = TRUE;
3783
3784                         /*
3785                          * wake up anybody waiting on entries that we have
3786                          * already wired.
3787                          */
3788                         if (need_wakeup) {
3789                                 vm_map_entry_wakeup(map);
3790                                 need_wakeup = FALSE;
3791                         }
3792                         /*
3793                          * User wiring is interruptible
3794                          */
3795                         wait_result = vm_map_entry_wait(map,
3796                                                         (user_wire) ? THREAD_ABORTSAFE :
3797                                                         THREAD_UNINT);
3798                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3799                                 /*
3800                                  * undo the wirings we have done so far
3801                                  * We do not clear the needs_wakeup flag,
3802                                  * because we cannot tell if we were the
3803                                  * only one waiting.
3804                                  */
3805                                 rc = KERN_FAILURE;
3806                                 goto done;
3807                         }
3808
3809                         /*
3810                          * Cannot avoid a lookup here. reset timestamp.
3811                          */
3812                         last_timestamp = map->timestamp;
3813
3814                         /*
3815                          * The entry could have been clipped, look it up again.
3816                          * Worse that can happen is, it may not exist anymore.
3817                          */
3818                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3819                                 /*
3820                                  * User: undo everything upto the previous
3821                                  * entry.  let vm_map_unwire worry about
3822                                  * checking the validity of the range.
3823                                  */
3824                                 rc = KERN_FAILURE;
3825                                 goto done;
3826                         }
3827                         entry = first_entry;
3828                         continue;
3829                 }
3830
3831                 if (entry->is_sub_map) {
3832                         vm_map_offset_t sub_start;
3833                         vm_map_offset_t sub_end;
3834                         vm_map_offset_t local_start;
3835                         vm_map_offset_t local_end;
3836                         pmap_t          pmap;
3837
3838                         vm_map_clip_start(map, entry, s);
3839                         vm_map_clip_end(map, entry, end);
3840
3841                         sub_start = entry->offset;
3842                         sub_end = entry->vme_end;
3843                         sub_end += entry->offset - entry->vme_start;
3844
3845                         local_end = entry->vme_end;
3846                         if(map_pmap == NULL) {
3847                                 vm_object_t             object;
3848                                 vm_object_offset_t      offset;
3849                                 vm_prot_t               prot;
3850                                 boolean_t               wired;
3851                                 vm_map_entry_t          local_entry;
3852                                 vm_map_version_t         version;
3853                                 vm_map_t                lookup_map;
3854
3855                                 if(entry->use_pmap) {
3856                                         pmap = entry->object.sub_map->pmap;
3857                                         /* ppc implementation requires that */
3858                                         /* submaps pmap address ranges line */
3859                                         /* up with parent map */
3860 #ifdef notdef
3861                                         pmap_addr = sub_start;
3862 #endif
3863                                         pmap_addr = s;
3864                                 } else {
3865                                         pmap = map->pmap;
3866                                         pmap_addr = s;
3867                                 }
3868
3869                                 if (entry->wired_count) {
3870                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3871                                                 goto done;
3872
3873                                         /*
3874                                          * The map was not unlocked:
3875                                          * no need to goto re-lookup.
3876                                          * Just go directly to next entry.
3877                                          */
3878                                         entry = entry->vme_next;
3879                                         s = entry->vme_start;
3880                                         continue;
3881
3882                                 }
3883
3884                                 /* call vm_map_lookup_locked to */
3885                                 /* cause any needs copy to be   */
3886                                 /* evaluated */
3887                                 local_start = entry->vme_start;
3888                                 lookup_map = map;
3889                                 vm_map_lock_write_to_read(map);
3890                                 if(vm_map_lookup_locked(
3891                                            &lookup_map, local_start,
3892                                            access_type,
3893                                            OBJECT_LOCK_EXCLUSIVE,
3894                                            &version, &object,
3895                                            &offset, &prot, &wired,
3896                                            NULL,
3897                                            &real_map)) {
3898
3899                                         vm_map_unlock_read(lookup_map);
3900                                         vm_map_unwire(map, start,
3901                                                       s, user_wire);
3902                                         return(KERN_FAILURE);
3903                                 }
3904                                 if(real_map != lookup_map)
3905                                         vm_map_unlock(real_map);
3906                                 vm_map_unlock_read(lookup_map);
3907                                 vm_map_lock(map);
3908                                 vm_object_unlock(object);
3909
3910                                 /* we unlocked, so must re-lookup */
3911                                 if (!vm_map_lookup_entry(map,
3912                                                          local_start,
3913                                                          &local_entry)) {
3914                                         rc = KERN_FAILURE;
3915                                         goto done;
3916                                 }
3917
3918                                 /*
3919                                  * entry could have been "simplified",
3920                                  * so re-clip
3921                                  */
3922                                 entry = local_entry;
3923                                 assert(s == local_start);
3924                                 vm_map_clip_start(map, entry, s);
3925                                 vm_map_clip_end(map, entry, end);
3926                                 /* re-compute "e" */
3927                                 e = entry->vme_end;
3928                                 if (e > end)
3929                                         e = end;
3930
3931                                 /* did we have a change of type? */
3932                                 if (!entry->is_sub_map) {
3933                                         last_timestamp = map->timestamp;
3934                                         continue;
3935                                 }
3936                         } else {
3937                                 local_start = entry->vme_start;
3938                                 pmap = map_pmap;
3939                         }
3940
3941                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3942                                 goto done;
3943
3944                         entry->in_transition = TRUE;
3945
3946                         vm_map_unlock(map);
3947                         rc = vm_map_wire_nested(entry->object.sub_map,
3948                                                 sub_start, sub_end,
3949                                                 access_type,
3950                                                 user_wire, pmap, pmap_addr);
3951                         vm_map_lock(map);
3952
3953                         /*
3954                          * Find the entry again.  It could have been clipped
3955                          * after we unlocked the map.
3956                          */
3957                         if (!vm_map_lookup_entry(map, local_start,
3958                                                  &first_entry))
3959                                 panic("vm_map_wire: re-lookup failed");
3960                         entry = first_entry;
3961
3962                         assert(local_start == s);
3963                         /* re-compute "e" */
3964                         e = entry->vme_end;
3965                         if (e > end)
3966                                 e = end;
3967
3968                         last_timestamp = map->timestamp;
3969                         while ((entry != vm_map_to_entry(map)) &&
3970                                (entry->vme_start < e)) {
3971                                 assert(entry->in_transition);
3972                                 entry->in_transition = FALSE;
3973                                 if (entry->needs_wakeup) {
3974                                         entry->needs_wakeup = FALSE;
3975                                         need_wakeup = TRUE;
3976                                 }
3977                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3978                                         subtract_wire_counts(map, entry, user_wire);
3979                                 }
3980                                 entry = entry->vme_next;
3981                         }
3982                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
3983                                 goto done;
3984                         }
3985
3986                         /* no need to relookup again */
3987                         s = entry->vme_start;
3988                         continue;
3989                 }
3990
3991                 /*
3992                  * If this entry is already wired then increment
3993                  * the appropriate wire reference count.
3994                  */
3995                 if (entry->wired_count) {
3996                         /*
3997                          * entry is already wired down, get our reference
3998                          * after clipping to our range.
3999                          */
4000                         vm_map_clip_start(map, entry, s);
4001                         vm_map_clip_end(map, entry, end);
4002
4003                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4004                                 goto done;
4005
4006                         /* map was not unlocked: no need to relookup */
4007                         entry = entry->vme_next;
4008                         s = entry->vme_start;
4009                         continue;
4010                 }
4011
4012                 /*
4013                  * Unwired entry or wire request transmitted via submap
4014                  */
4015
4016
4017                 /*
4018                  * Perform actions of vm_map_lookup that need the write
4019                  * lock on the map: create a shadow object for a
4020                  * copy-on-write region, or an object for a zero-fill
4021                  * region.
4022                  */
4023                 size = entry->vme_end - entry->vme_start;
4024                 /*
4025                  * If wiring a copy-on-write page, we need to copy it now
4026                  * even if we're only (currently) requesting read access.
4027                  * This is aggressive, but once it's wired we can't move it.
4028                  */
4029                 if (entry->needs_copy) {
4030                         vm_object_shadow(&entry->object.vm_object,
4031                                          &entry->offset, size);
4032                         entry->needs_copy = FALSE;
4033                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4034                         entry->object.vm_object = vm_object_allocate(size);
4035                         entry->offset = (vm_object_offset_t)0;
4036                 }
4037
4038                 vm_map_clip_start(map, entry, s);
4039                 vm_map_clip_end(map, entry, end);
4040
4041                 /* re-compute "e" */
4042                 e = entry->vme_end;
4043                 if (e > end)
4044                         e = end;
4045
4046                 /*
4047                  * Check for holes and protection mismatch.
4048                  * Holes: Next entry should be contiguous unless this
4049                  *        is the end of the region.
4050                  * Protection: Access requested must be allowed, unless
4051                  *      wiring is by protection class
4052                  */
4053                 if ((entry->vme_end < end) &&
4054                     ((entry->vme_next == vm_map_to_entry(map)) ||
4055                      (entry->vme_next->vme_start > entry->vme_end))) {
4056                         /* found a hole */
4057                         rc = KERN_INVALID_ADDRESS;
4058                         goto done;
4059                 }
4060                 if ((entry->protection & access_type) != access_type) {
4061                         /* found a protection problem */
4062                         rc = KERN_PROTECTION_FAILURE;
4063                         goto done;
4064                 }
4065
4066                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4067
4068                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4069                         goto done;
4070
4071                 entry->in_transition = TRUE;
4072
4073                 /*
4074                  * This entry might get split once we unlock the map.
4075                  * In vm_fault_wire(), we need the current range as
4076                  * defined by this entry.  In order for this to work
4077                  * along with a simultaneous clip operation, we make a
4078                  * temporary copy of this entry and use that for the
4079                  * wiring.  Note that the underlying objects do not
4080                  * change during a clip.
4081                  */
4082                 tmp_entry = *entry;
4083
4084                 /*
4085                  * The in_transition state guarentees that the entry
4086                  * (or entries for this range, if split occured) will be
4087                  * there when the map lock is acquired for the second time.
4088                  */
4089                 vm_map_unlock(map);
4090
4091                 if (!user_wire && cur_thread != THREAD_NULL)
4092                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4093                 else
4094                         interruptible_state = THREAD_UNINT;
4095
4096                 if(map_pmap)
4097                         rc = vm_fault_wire(map,
4098                                            &tmp_entry, map_pmap, pmap_addr);
4099                 else
4100                         rc = vm_fault_wire(map,
4101                                            &tmp_entry, map->pmap,
4102                                            tmp_entry.vme_start);
4103
4104                 if (!user_wire && cur_thread != THREAD_NULL)
4105                         thread_interrupt_level(interruptible_state);
4106
4107                 vm_map_lock(map);
4108
4109                 if (last_timestamp+1 != map->timestamp) {
4110                         /*
4111                          * Find the entry again.  It could have been clipped
4112                          * after we unlocked the map.
4113                          */
4114                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4115                                                  &first_entry))
4116                                 panic("vm_map_wire: re-lookup failed");
4117
4118                         entry = first_entry;
4119                 }
4120
4121                 last_timestamp = map->timestamp;
4122
4123                 while ((entry != vm_map_to_entry(map)) &&
4124                        (entry->vme_start < tmp_entry.vme_end)) {
4125                         assert(entry->in_transition);
4126                         entry->in_transition = FALSE;
4127                         if (entry->needs_wakeup) {
4128                                 entry->needs_wakeup = FALSE;
4129                                 need_wakeup = TRUE;
4130                         }
4131                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4132                                 subtract_wire_counts(map, entry, user_wire);
4133                         }
4134                         entry = entry->vme_next;
4135                 }
4136
4137                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4138                         goto done;
4139                 }
4140
4141                 s = entry->vme_start;
4142         } /* end while loop through map entries */
4143
4144 done:
4145         if (rc == KERN_SUCCESS) {
4146                 /* repair any damage we may have made to the VM map */
4147                 vm_map_simplify_range(map, start, end);
4148         }
4149
4150         vm_map_unlock(map);
4151
4152         /*
4153          * wake up anybody waiting on entries we wired.
4154          */
4155         if (need_wakeup)
4156                 vm_map_entry_wakeup(map);
4157
4158         if (rc != KERN_SUCCESS) {
4159                 /* undo what has been wired so far */
4160                 vm_map_unwire(map, start, s, user_wire);
4161         }
4162
4163         return rc;
4164
4165 }
4166
4167 kern_return_t
4168 vm_map_wire(
4169         register vm_map_t       map,
4170         register vm_map_offset_t        start,
4171         register vm_map_offset_t        end,
4172         register vm_prot_t      access_type,
4173         boolean_t               user_wire)
4174 {
4175
4176         kern_return_t   kret;
4177
4178         kret = vm_map_wire_nested(map, start, end, access_type,
4179                                   user_wire, (pmap_t)NULL, 0);
4180         return kret;
4181 }
4182
4183 /*
4184  *      vm_map_unwire:
4185  *
4186  *      Sets the pageability of the specified address range in the target
4187  *      as pageable.  Regions specified must have been wired previously.
4188  *
4189  *      The map must not be locked, but a reference must remain to the map
4190  *      throughout the call.
4191  *
4192  *      Kernel will panic on failures.  User unwire ignores holes and
4193  *      unwired and intransition entries to avoid losing memory by leaving
4194  *      it unwired.
4195  */
4196 static kern_return_t
4197 vm_map_unwire_nested(
4198         register vm_map_t       map,
4199         register vm_map_offset_t        start,
4200         register vm_map_offset_t        end,
4201         boolean_t               user_wire,
4202         pmap_t                  map_pmap,
4203         vm_map_offset_t         pmap_addr)
4204 {
4205         register vm_map_entry_t entry;
4206         struct vm_map_entry     *first_entry, tmp_entry;
4207         boolean_t               need_wakeup;
4208         boolean_t               main_map = FALSE;
4209         unsigned int            last_timestamp;
4210
4211         vm_map_lock(map);
4212         if(map_pmap == NULL)
4213                 main_map = TRUE;
4214         last_timestamp = map->timestamp;
4215
4216         VM_MAP_RANGE_CHECK(map, start, end);
4217         assert(page_aligned(start));
4218         assert(page_aligned(end));
4219
4220         if (start == end) {
4221                 /* We unwired what the caller asked for: zero pages */
4222                 vm_map_unlock(map);
4223                 return KERN_SUCCESS;
4224         }
4225
4226         if (vm_map_lookup_entry(map, start, &first_entry)) {
4227                 entry = first_entry;
4228                 /*
4229                  * vm_map_clip_start will be done later.
4230                  * We don't want to unnest any nested sub maps here !
4231                  */
4232         }
4233         else {
4234                 if (!user_wire) {
4235                         panic("vm_map_unwire: start not found");
4236                 }
4237                 /*      Start address is not in map. */
4238                 vm_map_unlock(map);
4239                 return(KERN_INVALID_ADDRESS);
4240         }
4241
4242         if (entry->superpage_size) {
4243                 /* superpages are always wired */
4244                 vm_map_unlock(map);
4245                 return KERN_INVALID_ADDRESS;
4246         }
4247
4248         need_wakeup = FALSE;
4249         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4250                 if (entry->in_transition) {
4251                         /*
4252                          * 1)
4253                          * Another thread is wiring down this entry. Note
4254                          * that if it is not for the other thread we would
4255                          * be unwiring an unwired entry.  This is not
4256                          * permitted.  If we wait, we will be unwiring memory
4257                          * we did not wire.
4258                          *
4259                          * 2)
4260                          * Another thread is unwiring this entry.  We did not
4261                          * have a reference to it, because if we did, this
4262                          * entry will not be getting unwired now.
4263                          */
4264                         if (!user_wire) {
4265                                 /*
4266                                  * XXX FBDP
4267                                  * This could happen:  there could be some
4268                                  * overlapping vslock/vsunlock operations
4269                                  * going on.
4270                                  * We should probably just wait and retry,
4271                                  * but then we have to be careful that this
4272                                  * entry could get "simplified" after
4273                                  * "in_transition" gets unset and before
4274                                  * we re-lookup the entry, so we would
4275                                  * have to re-clip the entry to avoid
4276                                  * re-unwiring what we have already unwired...
4277                                  * See vm_map_wire_nested().
4278                                  *
4279                                  * Or we could just ignore "in_transition"
4280                                  * here and proceed to decement the wired
4281                                  * count(s) on this entry.  That should be fine
4282                                  * as long as "wired_count" doesn't drop all
4283                                  * the way to 0 (and we should panic if THAT
4284                                  * happens).
4285                                  */
4286                                 panic("vm_map_unwire: in_transition entry");
4287                         }
4288
4289                         entry = entry->vme_next;
4290                         continue;
4291                 }
4292
4293                 if (entry->is_sub_map) {
4294                         vm_map_offset_t sub_start;
4295                         vm_map_offset_t sub_end;
4296                         vm_map_offset_t local_end;
4297                         pmap_t          pmap;
4298
4299                         vm_map_clip_start(map, entry, start);
4300                         vm_map_clip_end(map, entry, end);
4301
4302                         sub_start = entry->offset;
4303                         sub_end = entry->vme_end - entry->vme_start;
4304                         sub_end += entry->offset;
4305                         local_end = entry->vme_end;
4306                         if(map_pmap == NULL) {
4307                                 if(entry->use_pmap) {
4308                                         pmap = entry->object.sub_map->pmap;
4309                                         pmap_addr = sub_start;
4310                                 } else {
4311                                         pmap = map->pmap;
4312                                         pmap_addr = start;
4313                                 }
4314                                 if (entry->wired_count == 0 ||
4315                                     (user_wire && entry->user_wired_count == 0)) {
4316                                         if (!user_wire)
4317                                                 panic("vm_map_unwire: entry is unwired");
4318                                         entry = entry->vme_next;
4319                                         continue;
4320                                 }
4321
4322                                 /*
4323                                  * Check for holes
4324                                  * Holes: Next entry should be contiguous unless
4325                                  * this is the end of the region.
4326                                  */
4327                                 if (((entry->vme_end < end) &&
4328                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4329                                       (entry->vme_next->vme_start
4330                                        > entry->vme_end)))) {
4331                                         if (!user_wire)
4332                                                 panic("vm_map_unwire: non-contiguous region");
4333 /*
4334                                         entry = entry->vme_next;
4335                                         continue;
4336 */
4337                                 }
4338
4339                                 subtract_wire_counts(map, entry, user_wire);
4340
4341                                 if (entry->wired_count != 0) {
4342                                         entry = entry->vme_next;
4343                                         continue;
4344                                 }
4345
4346                                 entry->in_transition = TRUE;
4347                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4348
4349                                 /*
4350                                  * We can unlock the map now. The in_transition state
4351                                  * guarantees existance of the entry.
4352                                  */
4353                                 vm_map_unlock(map);
4354                                 vm_map_unwire_nested(entry->object.sub_map,
4355                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4356                                 vm_map_lock(map);
4357
4358                                 if (last_timestamp+1 != map->timestamp) {
4359                                         /*
4360                                          * Find the entry again.  It could have been
4361                                          * clipped or deleted after we unlocked the map.
4362                                          */
4363                                         if (!vm_map_lookup_entry(map,
4364                                                                  tmp_entry.vme_start,
4365                                                                  &first_entry)) {
4366                                                 if (!user_wire)
4367                                                         panic("vm_map_unwire: re-lookup failed");
4368                                                 entry = first_entry->vme_next;
4369                                         } else
4370                                                 entry = first_entry;
4371                                 }
4372                                 last_timestamp = map->timestamp;
4373
4374                                 /*
4375                                  * clear transition bit for all constituent entries
4376                                  * that were in the original entry (saved in
4377                                  * tmp_entry).  Also check for waiters.
4378                                  */
4379                                 while ((entry != vm_map_to_entry(map)) &&
4380                                        (entry->vme_start < tmp_entry.vme_end)) {
4381                                         assert(entry->in_transition);
4382                                         entry->in_transition = FALSE;
4383                                         if (entry->needs_wakeup) {
4384                                                 entry->needs_wakeup = FALSE;
4385                                                 need_wakeup = TRUE;
4386                                         }
4387                                         entry = entry->vme_next;
4388                                 }
4389                                 continue;
4390                         } else {
4391                                 vm_map_unlock(map);
4392                                 vm_map_unwire_nested(entry->object.sub_map,
4393                                                      sub_start, sub_end, user_wire, map_pmap,
4394                                                      pmap_addr);
4395                                 vm_map_lock(map);
4396
4397                                 if (last_timestamp+1 != map->timestamp) {
4398                                         /*
4399                                          * Find the entry again.  It could have been
4400                                          * clipped or deleted after we unlocked the map.
4401                                          */
4402                                         if (!vm_map_lookup_entry(map,
4403                                                                  tmp_entry.vme_start,
4404                                                                  &first_entry)) {
4405                                                 if (!user_wire)
4406                                                         panic("vm_map_unwire: re-lookup failed");
4407                                                 entry = first_entry->vme_next;
4408                                         } else
4409                                                 entry = first_entry;
4410                                 }
4411                                 last_timestamp = map->timestamp;
4412                         }
4413                 }
4414
4415
4416                 if ((entry->wired_count == 0) ||
4417                     (user_wire && entry->user_wired_count == 0)) {
4418                         if (!user_wire)
4419                                 panic("vm_map_unwire: entry is unwired");
4420
4421                         entry = entry->vme_next;
4422                         continue;
4423                 }
4424
4425                 assert(entry->wired_count > 0 &&
4426                        (!user_wire || entry->user_wired_count > 0));
4427
4428                 vm_map_clip_start(map, entry, start);
4429                 vm_map_clip_end(map, entry, end);
4430
4431                 /*
4432                  * Check for holes
4433                  * Holes: Next entry should be contiguous unless
4434                  *        this is the end of the region.
4435                  */
4436                 if (((entry->vme_end < end) &&
4437                      ((entry->vme_next == vm_map_to_entry(map)) ||
4438                       (entry->vme_next->vme_start > entry->vme_end)))) {
4439
4440                         if (!user_wire)
4441                                 panic("vm_map_unwire: non-contiguous region");
4442                         entry = entry->vme_next;
4443                         continue;
4444                 }
4445
4446                 subtract_wire_counts(map, entry, user_wire);
4447
4448                 if (entry->wired_count != 0) {
4449                         entry = entry->vme_next;
4450                         continue;
4451                 }
4452
4453                 if(entry->zero_wired_pages) {
4454                         entry->zero_wired_pages = FALSE;
4455                 }
4456
4457                 entry->in_transition = TRUE;
4458                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4459
4460                 /*
4461                  * We can unlock the map now. The in_transition state
4462                  * guarantees existance of the entry.
4463                  */
4464                 vm_map_unlock(map);
4465                 if(map_pmap) {
4466                         vm_fault_unwire(map,
4467                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4468                 } else {
4469                         vm_fault_unwire(map,
4470                                         &tmp_entry, FALSE, map->pmap,
4471                                         tmp_entry.vme_start);
4472                 }
4473                 vm_map_lock(map);
4474
4475                 if (last_timestamp+1 != map->timestamp) {
4476                         /*
4477                          * Find the entry again.  It could have been clipped
4478                          * or deleted after we unlocked the map.
4479                          */
4480                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4481                                                  &first_entry)) {
4482                                 if (!user_wire)
4483                                         panic("vm_map_unwire: re-lookup failed");
4484                                 entry = first_entry->vme_next;
4485                         } else
4486                                 entry = first_entry;
4487                 }
4488                 last_timestamp = map->timestamp;
4489
4490                 /*
4491                  * clear transition bit for all constituent entries that
4492                  * were in the original entry (saved in tmp_entry).  Also
4493                  * check for waiters.
4494                  */
4495                 while ((entry != vm_map_to_entry(map)) &&
4496                        (entry->vme_start < tmp_entry.vme_end)) {
4497                         assert(entry->in_transition);
4498                         entry->in_transition = FALSE;
4499                         if (entry->needs_wakeup) {
4500                                 entry->needs_wakeup = FALSE;
4501                                 need_wakeup = TRUE;
4502                         }
4503                         entry = entry->vme_next;
4504                 }
4505         }
4506
4507         /*
4508          * We might have fragmented the address space when we wired this
4509          * range of addresses.  Attempt to re-coalesce these VM map entries
4510          * with their neighbors now that they're no longer wired.
4511          * Under some circumstances, address space fragmentation can
4512          * prevent VM object shadow chain collapsing, which can cause
4513          * swap space leaks.
4514          */
4515         vm_map_simplify_range(map, start, end);
4516
4517         vm_map_unlock(map);
4518         /*
4519          * wake up anybody waiting on entries that we have unwired.
4520          */
4521         if (need_wakeup)
4522                 vm_map_entry_wakeup(map);
4523         return(KERN_SUCCESS);
4524
4525 }
4526
4527 kern_return_t
4528 vm_map_unwire(
4529         register vm_map_t       map,
4530         register vm_map_offset_t        start,
4531         register vm_map_offset_t        end,
4532         boolean_t               user_wire)
4533 {
4534         return vm_map_unwire_nested(map, start, end,
4535                                     user_wire, (pmap_t)NULL, 0);
4536 }
4537
4538
4539 /*
4540  *      vm_map_entry_delete:    [ internal use only ]
4541  *
4542  *      Deallocate the given entry from the target map.
4543  */
4544 static void
4545 vm_map_entry_delete(
4546         register vm_map_t       map,
4547         register vm_map_entry_t entry)
4548 {
4549         register vm_map_offset_t        s, e;
4550         register vm_object_t    object;
4551         register vm_map_t       submap;
4552
4553         s = entry->vme_start;
4554         e = entry->vme_end;
4555         assert(page_aligned(s));
4556         assert(page_aligned(e));
4557         assert(entry->wired_count == 0);
4558         assert(entry->user_wired_count == 0);
4559         assert(!entry->permanent);
4560
4561         if (entry->is_sub_map) {
4562                 object = NULL;
4563                 submap = entry->object.sub_map;
4564         } else {
4565                 submap = NULL;
4566                 object = entry->object.vm_object;
4567         }
4568
4569         vm_map_store_entry_unlink(map, entry);
4570         map->size -= e - s;
4571
4572         vm_map_entry_dispose(map, entry);
4573
4574         vm_map_unlock(map);
4575         /*
4576          *      Deallocate the object only after removing all
4577          *      pmap entries pointing to its pages.
4578          */
4579         if (submap)
4580                 vm_map_deallocate(submap);
4581         else
4582                 vm_object_deallocate(object);
4583
4584 }
4585
4586 void
4587 vm_map_submap_pmap_clean(
4588         vm_map_t        map,
4589         vm_map_offset_t start,
4590         vm_map_offset_t end,
4591         vm_map_t        sub_map,
4592         vm_map_offset_t offset)
4593 {
4594         vm_map_offset_t submap_start;
4595         vm_map_offset_t submap_end;
4596         vm_map_size_t   remove_size;
4597         vm_map_entry_t  entry;
4598
4599         submap_end = offset + (end - start);
4600         submap_start = offset;
4601
4602         vm_map_lock_read(sub_map);
4603         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4604
4605                 remove_size = (entry->vme_end - entry->vme_start);
4606                 if(offset > entry->vme_start)
4607                         remove_size -= offset - entry->vme_start;
4608
4609
4610                 if(submap_end < entry->vme_end) {
4611                         remove_size -=
4612                                 entry->vme_end - submap_end;
4613                 }
4614                 if(entry->is_sub_map) {
4615                         vm_map_submap_pmap_clean(
4616                                 sub_map,
4617                                 start,
4618                                 start + remove_size,
4619                                 entry->object.sub_map,
4620                                 entry->offset);
4621                 } else {
4622
4623                         if((map->mapped) && (map->ref_count)
4624                            && (entry->object.vm_object != NULL)) {
4625                                 vm_object_pmap_protect(
4626                                         entry->object.vm_object,
4627                                         entry->offset+(offset-entry->vme_start),
4628                                         remove_size,
4629                                         PMAP_NULL,
4630                                         entry->vme_start,
4631                                         VM_PROT_NONE);
4632                         } else {
4633                                 pmap_remove(map->pmap,
4634                                             (addr64_t)start,
4635                                             (addr64_t)(start + remove_size));
4636                         }
4637                 }
4638         }
4639
4640         entry = entry->vme_next;
4641
4642         while((entry != vm_map_to_entry(sub_map))
4643               && (entry->vme_start < submap_end)) {
4644                 remove_size = (entry->vme_end - entry->vme_start);
4645                 if(submap_end < entry->vme_end) {
4646                         remove_size -= entry->vme_end - submap_end;
4647                 }
4648                 if(entry->is_sub_map) {
4649                         vm_map_submap_pmap_clean(
4650                                 sub_map,
4651                                 (start + entry->vme_start) - offset,
4652                                 ((start + entry->vme_start) - offset) + remove_size,
4653                                 entry->object.sub_map,
4654                                 entry->offset);
4655                 } else {
4656                         if((map->mapped) && (map->ref_count)
4657                            && (entry->object.vm_object != NULL)) {
4658                                 vm_object_pmap_protect(
4659                                         entry->object.vm_object,
4660                                         entry->offset,
4661                                         remove_size,
4662                                         PMAP_NULL,
4663                                         entry->vme_start,
4664                                         VM_PROT_NONE);
4665                         } else {
4666                                 pmap_remove(map->pmap,
4667                                             (addr64_t)((start + entry->vme_start)
4668                                                        - offset),
4669                                             (addr64_t)(((start + entry->vme_start)
4670                                                         - offset) + remove_size));
4671                         }
4672                 }
4673                 entry = entry->vme_next;
4674         }
4675         vm_map_unlock_read(sub_map);
4676         return;
4677 }
4678
4679 /*
4680  *      vm_map_delete:  [ internal use only ]
4681  *
4682  *      Deallocates the given address range from the target map.
4683  *      Removes all user wirings. Unwires one kernel wiring if
4684  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4685  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4686  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4687  *
4688  *      This routine is called with map locked and leaves map locked.
4689  */
4690 static kern_return_t
4691 vm_map_delete(
4692         vm_map_t                map,
4693         vm_map_offset_t         start,
4694         vm_map_offset_t         end,
4695         int                     flags,
4696         vm_map_t                zap_map)
4697 {
4698         vm_map_entry_t          entry, next;
4699         struct   vm_map_entry   *first_entry, tmp_entry;
4700         register vm_map_offset_t s;
4701         register vm_object_t    object;
4702         boolean_t               need_wakeup;
4703         unsigned int            last_timestamp = ~0; /* unlikely value */
4704         int                     interruptible;
4705
4706         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4707                 THREAD_ABORTSAFE : THREAD_UNINT;
4708
4709         /*
4710          * All our DMA I/O operations in IOKit are currently done by
4711          * wiring through the map entries of the task requesting the I/O.
4712          * Because of this, we must always wait for kernel wirings
4713          * to go away on the entries before deleting them.
4714          *
4715          * Any caller who wants to actually remove a kernel wiring
4716          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4717          * properly remove one wiring instead of blasting through
4718          * them all.
4719          */
4720         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4721
4722         while(1) {
4723                 /*
4724                  *      Find the start of the region, and clip it
4725                  */
4726                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4727                         entry = first_entry;
4728                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4729                                 start = SUPERPAGE_ROUND_DOWN(start);
4730                                 continue;
4731                         }
4732                         if (start == entry->vme_start) {
4733                                 /*
4734                                  * No need to clip.  We don't want to cause
4735                                  * any unnecessary unnesting in this case...
4736                                  */
4737                         } else {
4738                                 vm_map_clip_start(map, entry, start);
4739                         }
4740
4741                         /*
4742                          *      Fix the lookup hint now, rather than each
4743                          *      time through the loop.
4744                          */
4745                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4746                 } else {
4747                         entry = first_entry->vme_next;
4748                 }
4749                 break;
4750         }
4751         if (entry->superpage_size)
4752                 end = SUPERPAGE_ROUND_UP(end);
4753
4754         need_wakeup = FALSE;
4755         /*
4756          *      Step through all entries in this region
4757          */
4758         s = entry->vme_start;
4759         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4760                 /*
4761                  * At this point, we have deleted all the memory entries
4762                  * between "start" and "s".  We still need to delete
4763                  * all memory entries between "s" and "end".
4764                  * While we were blocked and the map was unlocked, some
4765                  * new memory entries could have been re-allocated between
4766                  * "start" and "s" and we don't want to mess with those.
4767                  * Some of those entries could even have been re-assembled
4768                  * with an entry after "s" (in vm_map_simplify_entry()), so
4769                  * we may have to vm_map_clip_start() again.
4770                  */
4771
4772                 if (entry->vme_start >= s) {
4773                         /*
4774                          * This entry starts on or after "s"
4775                          * so no need to clip its start.
4776                          */
4777                 } else {
4778                         /*
4779                          * This entry has been re-assembled by a
4780                          * vm_map_simplify_entry().  We need to
4781                          * re-clip its start.
4782                          */
4783                         vm_map_clip_start(map, entry, s);
4784                 }
4785                 if (entry->vme_end <= end) {
4786                         /*
4787                          * This entry is going away completely, so no need
4788                          * to clip and possibly cause an unnecessary unnesting.
4789                          */
4790                 } else {
4791                         vm_map_clip_end(map, entry, end);
4792                 }
4793
4794                 if (entry->permanent) {
4795                         panic("attempt to remove permanent VM map entry "
4796                               "%p [0x%llx:0x%llx]\n",
4797                               entry, (uint64_t) s, (uint64_t) end);
4798                 }
4799
4800
4801                 if (entry->in_transition) {
4802                         wait_result_t wait_result;
4803
4804                         /*
4805                          * Another thread is wiring/unwiring this entry.
4806                          * Let the other thread know we are waiting.
4807                          */
4808                         assert(s == entry->vme_start);
4809                         entry->needs_wakeup = TRUE;
4810
4811                         /*
4812                          * wake up anybody waiting on entries that we have
4813                          * already unwired/deleted.
4814                          */
4815                         if (need_wakeup) {
4816                                 vm_map_entry_wakeup(map);
4817                                 need_wakeup = FALSE;
4818                         }
4819
4820                         wait_result = vm_map_entry_wait(map, interruptible);
4821
4822                         if (interruptible &&
4823                             wait_result == THREAD_INTERRUPTED) {
4824                                 /*
4825                                  * We do not clear the needs_wakeup flag,
4826                                  * since we cannot tell if we were the only one.
4827                                  */
4828                                 vm_map_unlock(map);
4829                                 return KERN_ABORTED;
4830                         }
4831
4832                         /*
4833                          * The entry could have been clipped or it
4834                          * may not exist anymore.  Look it up again.
4835                          */
4836                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4837                                 assert((map != kernel_map) &&
4838                                        (!entry->is_sub_map));
4839                                 /*
4840                                  * User: use the next entry
4841                                  */
4842                                 entry = first_entry->vme_next;
4843                                 s = entry->vme_start;
4844                         } else {
4845                                 entry = first_entry;
4846                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4847                         }
4848                         last_timestamp = map->timestamp;
4849                         continue;
4850                 } /* end in_transition */
4851
4852                 if (entry->wired_count) {
4853                         boolean_t       user_wire;
4854
4855                         user_wire = entry->user_wired_count > 0;
4856
4857                         /*
4858                          *      Remove a kernel wiring if requested
4859                          */
4860                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
4861                                 entry->wired_count--;
4862                         }
4863
4864                         /*
4865                          *      Remove all user wirings for proper accounting
4866                          */
4867                         if (entry->user_wired_count > 0) {
4868                                 while (entry->user_wired_count)
4869                                         subtract_wire_counts(map, entry, user_wire);
4870                         }
4871
4872                         if (entry->wired_count != 0) {
4873                                 assert(map != kernel_map);
4874                                 /*
4875                                  * Cannot continue.  Typical case is when
4876                                  * a user thread has physical io pending on
4877                                  * on this page.  Either wait for the
4878                                  * kernel wiring to go away or return an
4879                                  * error.
4880                                  */
4881                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4882                                         wait_result_t wait_result;
4883
4884                                         assert(s == entry->vme_start);
4885                                         entry->needs_wakeup = TRUE;
4886                                         wait_result = vm_map_entry_wait(map,
4887                                                                         interruptible);
4888
4889                                         if (interruptible &&
4890                                             wait_result == THREAD_INTERRUPTED) {
4891                                                 /*
4892                                                  * We do not clear the
4893                                                  * needs_wakeup flag, since we
4894                                                  * cannot tell if we were the
4895                                                  * only one.
4896                                                  */
4897                                                 vm_map_unlock(map);
4898                                                 return KERN_ABORTED;
4899                                         }
4900
4901                                         /*
4902                                          * The entry could have been clipped or
4903                                          * it may not exist anymore.  Look it
4904                                          * up again.
4905                                          */
4906                                         if (!vm_map_lookup_entry(map, s,
4907                                                                  &first_entry)) {
4908                                                 assert(map != kernel_map);
4909                                                 /*
4910                                                  * User: use the next entry
4911                                                  */
4912                                                 entry = first_entry->vme_next;
4913                                                 s = entry->vme_start;
4914                                         } else {
4915                                                 entry = first_entry;
4916                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4917                                         }
4918                                         last_timestamp = map->timestamp;
4919                                         continue;
4920                                 }
4921                                 else {
4922                                         return KERN_FAILURE;
4923                                 }
4924                         }
4925
4926                         entry->in_transition = TRUE;
4927                         /*
4928                          * copy current entry.  see comment in vm_map_wire()
4929                          */
4930                         tmp_entry = *entry;
4931                         assert(s == entry->vme_start);
4932
4933                         /*
4934                          * We can unlock the map now. The in_transition
4935                          * state guarentees existance of the entry.
4936                          */
4937                         vm_map_unlock(map);
4938
4939                         if (tmp_entry.is_sub_map) {
4940                                 vm_map_t sub_map;
4941                                 vm_map_offset_t sub_start, sub_end;
4942                                 pmap_t pmap;
4943                                 vm_map_offset_t pmap_addr;
4944
4945
4946                                 sub_map = tmp_entry.object.sub_map;
4947                                 sub_start = tmp_entry.offset;
4948                                 sub_end = sub_start + (tmp_entry.vme_end -
4949                                                        tmp_entry.vme_start);
4950                                 if (tmp_entry.use_pmap) {
4951                                         pmap = sub_map->pmap;
4952                                         pmap_addr = tmp_entry.vme_start;
4953                                 } else {
4954                                         pmap = map->pmap;
4955                                         pmap_addr = tmp_entry.vme_start;
4956                                 }
4957                                 (void) vm_map_unwire_nested(sub_map,
4958                                                             sub_start, sub_end,
4959                                                             user_wire,
4960                                                             pmap, pmap_addr);
4961                         } else {
4962
4963                                 vm_fault_unwire(map, &tmp_entry,
4964                                                 tmp_entry.object.vm_object == kernel_object,
4965                                                 map->pmap, tmp_entry.vme_start);
4966                         }
4967
4968                         vm_map_lock(map);
4969
4970                         if (last_timestamp+1 != map->timestamp) {
4971                                 /*
4972                                  * Find the entry again.  It could have
4973                                  * been clipped after we unlocked the map.
4974                                  */
4975                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
4976                                         assert((map != kernel_map) &&
4977                                                (!entry->is_sub_map));
4978                                         first_entry = first_entry->vme_next;
4979                                         s = first_entry->vme_start;
4980                                 } else {
4981                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4982                                 }
4983                         } else {
4984                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4985                                 first_entry = entry;
4986                         }
4987
4988                         last_timestamp = map->timestamp;
4989
4990                         entry = first_entry;
4991                         while ((entry != vm_map_to_entry(map)) &&
4992                                (entry->vme_start < tmp_entry.vme_end)) {
4993                                 assert(entry->in_transition);
4994                                 entry->in_transition = FALSE;
4995                                 if (entry->needs_wakeup) {
4996                                         entry->needs_wakeup = FALSE;
4997                                         need_wakeup = TRUE;
4998                                 }
4999                                 entry = entry->vme_next;
5000                         }
5001                         /*
5002                          * We have unwired the entry(s).  Go back and
5003                          * delete them.
5004                          */
5005                         entry = first_entry;
5006                         continue;
5007                 }
5008
5009                 /* entry is unwired */
5010                 assert(entry->wired_count == 0);
5011                 assert(entry->user_wired_count == 0);
5012
5013                 assert(s == entry->vme_start);
5014
5015                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5016                         /*
5017                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5018                          * vm_map_delete(), some map entries might have been
5019                          * transferred to a "zap_map", which doesn't have a
5020                          * pmap.  The original pmap has already been flushed
5021                          * in the vm_map_delete() call targeting the original
5022                          * map, but when we get to destroying the "zap_map",
5023                          * we don't have any pmap to flush, so let's just skip
5024                          * all this.
5025                          */
5026                 } else if (entry->is_sub_map) {
5027                         if (entry->use_pmap) {
5028 #ifndef NO_NESTED_PMAP
5029                                 pmap_unnest(map->pmap,
5030                                             (addr64_t)entry->vme_start,
5031                                             entry->vme_end - entry->vme_start);
5032 #endif  /* NO_NESTED_PMAP */
5033                                 if ((map->mapped) && (map->ref_count)) {
5034                                         /* clean up parent map/maps */
5035                                         vm_map_submap_pmap_clean(
5036                                                 map, entry->vme_start,
5037                                                 entry->vme_end,
5038                                                 entry->object.sub_map,
5039                                                 entry->offset);
5040                                 }
5041                         } else {
5042                                 vm_map_submap_pmap_clean(
5043                                         map, entry->vme_start, entry->vme_end,
5044                                         entry->object.sub_map,
5045                                         entry->offset);
5046                         }
5047                 } else if (entry->object.vm_object != kernel_object) {
5048                         object = entry->object.vm_object;
5049                         if((map->mapped) && (map->ref_count)) {
5050                                 vm_object_pmap_protect(
5051                                         object, entry->offset,
5052                                         entry->vme_end - entry->vme_start,
5053                                         PMAP_NULL,
5054                                         entry->vme_start,
5055                                         VM_PROT_NONE);
5056                         } else {
5057                                 pmap_remove(map->pmap,
5058                                             (addr64_t)entry->vme_start,
5059                                             (addr64_t)entry->vme_end);
5060                         }
5061                 }
5062
5063                 /*
5064                  * All pmap mappings for this map entry must have been
5065                  * cleared by now.
5066                  */
5067                 assert(vm_map_pmap_is_empty(map,
5068                                             entry->vme_start,
5069                                             entry->vme_end));
5070
5071                 next = entry->vme_next;
5072                 s = next->vme_start;
5073                 last_timestamp = map->timestamp;
5074
5075                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5076                     zap_map != VM_MAP_NULL) {
5077                         vm_map_size_t entry_size;
5078                         /*
5079                          * The caller wants to save the affected VM map entries
5080                          * into the "zap_map".  The caller will take care of
5081                          * these entries.
5082                          */
5083                         /* unlink the entry from "map" ... */
5084                         vm_map_store_entry_unlink(map, entry);
5085                         /* ... and add it to the end of the "zap_map" */
5086                         vm_map_store_entry_link(zap_map,
5087                                           vm_map_last_entry(zap_map),
5088                                           entry);
5089                         entry_size = entry->vme_end - entry->vme_start;
5090                         map->size -= entry_size;
5091                         zap_map->size += entry_size;
5092                         /* we didn't unlock the map, so no timestamp increase */
5093                         last_timestamp--;
5094                 } else {
5095                         vm_map_entry_delete(map, entry);
5096                         /* vm_map_entry_delete unlocks the map */
5097                         vm_map_lock(map);
5098                 }
5099
5100                 entry = next;
5101
5102                 if(entry == vm_map_to_entry(map)) {
5103                         break;
5104                 }
5105                 if (last_timestamp+1 != map->timestamp) {
5106                         /*
5107                          * we are responsible for deleting everything
5108                          * from the give space, if someone has interfered
5109                          * we pick up where we left off, back fills should
5110                          * be all right for anyone except map_delete and
5111                          * we have to assume that the task has been fully
5112                          * disabled before we get here
5113                          */
5114                         if (!vm_map_lookup_entry(map, s, &entry)){
5115                                 entry = entry->vme_next;
5116                                 s = entry->vme_start;
5117                         } else {
5118                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5119                         }
5120                         /*
5121                          * others can not only allocate behind us, we can
5122                          * also see coalesce while we don't have the map lock
5123                          */
5124                         if(entry == vm_map_to_entry(map)) {
5125                                 break;
5126                         }
5127                 }
5128                 last_timestamp = map->timestamp;
5129         }
5130
5131         if (map->wait_for_space)
5132                 thread_wakeup((event_t) map);
5133         /*
5134          * wake up anybody waiting on entries that we have already deleted.
5135          */
5136         if (need_wakeup)
5137                 vm_map_entry_wakeup(map);
5138
5139         return KERN_SUCCESS;
5140 }
5141
5142 /*
5143  *      vm_map_remove:
5144  *
5145  *      Remove the given address range from the target map.
5146  *      This is the exported form of vm_map_delete.
5147  */
5148 kern_return_t
5149 vm_map_remove(
5150         register vm_map_t       map,
5151         register vm_map_offset_t        start,
5152         register vm_map_offset_t        end,
5153         register boolean_t      flags)
5154 {
5155         register kern_return_t  result;
5156
5157         vm_map_lock(map);
5158         VM_MAP_RANGE_CHECK(map, start, end);
5159         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5160         vm_map_unlock(map);
5161
5162         return(result);
5163 }
5164
5165
5166 /*
5167  *      Routine:        vm_map_copy_discard
5168  *
5169  *      Description:
5170  *              Dispose of a map copy object (returned by
5171  *              vm_map_copyin).
5172  */
5173 void
5174 vm_map_copy_discard(
5175         vm_map_copy_t   copy)
5176 {
5177         if (copy == VM_MAP_COPY_NULL)
5178                 return;
5179
5180         switch (copy->type) {
5181         case VM_MAP_COPY_ENTRY_LIST:
5182                 while (vm_map_copy_first_entry(copy) !=
5183                        vm_map_copy_to_entry(copy)) {
5184                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5185
5186                         vm_map_copy_entry_unlink(copy, entry);
5187                         vm_object_deallocate(entry->object.vm_object);
5188                         vm_map_copy_entry_dispose(copy, entry);
5189                 }
5190                 break;
5191         case VM_MAP_COPY_OBJECT:
5192                 vm_object_deallocate(copy->cpy_object);
5193                 break;
5194         case VM_MAP_COPY_KERNEL_BUFFER:
5195
5196                 /*
5197                  * The vm_map_copy_t and possibly the data buffer were
5198                  * allocated by a single call to kalloc(), i.e. the
5199                  * vm_map_copy_t was not allocated out of the zone.
5200                  */
5201                 kfree(copy, copy->cpy_kalloc_size);
5202                 return;
5203         }
5204         zfree(vm_map_copy_zone, copy);
5205 }
5206
5207 /*
5208  *      Routine:        vm_map_copy_copy
5209  *
5210  *      Description:
5211  *                      Move the information in a map copy object to
5212  *                      a new map copy object, leaving the old one
5213  *                      empty.
5214  *
5215  *                      This is used by kernel routines that need
5216  *                      to look at out-of-line data (in copyin form)
5217  *                      before deciding whether to return SUCCESS.
5218  *                      If the routine returns FAILURE, the original
5219  *                      copy object will be deallocated; therefore,
5220  *                      these routines must make a copy of the copy
5221  *                      object and leave the original empty so that
5222  *                      deallocation will not fail.
5223  */
5224 vm_map_copy_t
5225 vm_map_copy_copy(
5226         vm_map_copy_t   copy)
5227 {
5228         vm_map_copy_t   new_copy;
5229
5230         if (copy == VM_MAP_COPY_NULL)
5231                 return VM_MAP_COPY_NULL;
5232
5233         /*
5234          * Allocate a new copy object, and copy the information
5235          * from the old one into it.
5236          */
5237
5238         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5239         *new_copy = *copy;
5240
5241         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5242                 /*
5243                  * The links in the entry chain must be
5244                  * changed to point to the new copy object.
5245                  */
5246                 vm_map_copy_first_entry(copy)->vme_prev
5247                         = vm_map_copy_to_entry(new_copy);
5248                 vm_map_copy_last_entry(copy)->vme_next
5249                         = vm_map_copy_to_entry(new_copy);
5250         }
5251
5252         /*
5253          * Change the old copy object into one that contains
5254          * nothing to be deallocated.
5255          */
5256         copy->type = VM_MAP_COPY_OBJECT;
5257         copy->cpy_object = VM_OBJECT_NULL;
5258
5259         /*
5260          * Return the new object.
5261          */
5262         return new_copy;
5263 }
5264
5265 static kern_return_t
5266 vm_map_overwrite_submap_recurse(
5267         vm_map_t        dst_map,
5268         vm_map_offset_t dst_addr,
5269         vm_map_size_t   dst_size)
5270 {
5271         vm_map_offset_t dst_end;
5272         vm_map_entry_t  tmp_entry;
5273         vm_map_entry_t  entry;
5274         kern_return_t   result;
5275         boolean_t       encountered_sub_map = FALSE;
5276
5277
5278
5279         /*
5280          *      Verify that the destination is all writeable
5281          *      initially.  We have to trunc the destination
5282          *      address and round the copy size or we'll end up
5283          *      splitting entries in strange ways.
5284          */
5285
5286         dst_end = vm_map_round_page(dst_addr + dst_size);
5287         vm_map_lock(dst_map);
5288
5289 start_pass_1:
5290         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5291                 vm_map_unlock(dst_map);
5292                 return(KERN_INVALID_ADDRESS);
5293         }
5294
5295         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5296         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5297
5298         for (entry = tmp_entry;;) {
5299                 vm_map_entry_t  next;
5300
5301                 next = entry->vme_next;
5302                 while(entry->is_sub_map) {
5303                         vm_map_offset_t sub_start;
5304                         vm_map_offset_t sub_end;
5305                         vm_map_offset_t local_end;
5306
5307                         if (entry->in_transition) {
5308                                 /*
5309                                  * Say that we are waiting, and wait for entry.
5310                                  */
5311                                 entry->needs_wakeup = TRUE;
5312                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5313
5314                                 goto start_pass_1;
5315                         }
5316
5317                         encountered_sub_map = TRUE;
5318                         sub_start = entry->offset;
5319
5320                         if(entry->vme_end < dst_end)
5321                                 sub_end = entry->vme_end;
5322                         else
5323                                 sub_end = dst_end;
5324                         sub_end -= entry->vme_start;
5325                         sub_end += entry->offset;
5326                         local_end = entry->vme_end;
5327                         vm_map_unlock(dst_map);
5328
5329                         result = vm_map_overwrite_submap_recurse(
5330                                 entry->object.sub_map,
5331                                 sub_start,
5332                                 sub_end - sub_start);
5333
5334                         if(result != KERN_SUCCESS)
5335                                 return result;
5336                         if (dst_end <= entry->vme_end)
5337                                 return KERN_SUCCESS;
5338                         vm_map_lock(dst_map);
5339                         if(!vm_map_lookup_entry(dst_map, local_end,
5340                                                 &tmp_entry)) {
5341                                 vm_map_unlock(dst_map);
5342                                 return(KERN_INVALID_ADDRESS);
5343                         }
5344                         entry = tmp_entry;
5345                         next = entry->vme_next;
5346                 }
5347
5348                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5349                         vm_map_unlock(dst_map);
5350                         return(KERN_PROTECTION_FAILURE);
5351                 }
5352
5353                 /*
5354                  *      If the entry is in transition, we must wait
5355                  *      for it to exit that state.  Anything could happen
5356                  *      when we unlock the map, so start over.
5357                  */
5358                 if (entry->in_transition) {
5359
5360                         /*
5361                          * Say that we are waiting, and wait for entry.
5362                          */
5363                         entry->needs_wakeup = TRUE;
5364                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5365
5366                         goto start_pass_1;
5367                 }
5368
5369 /*
5370  *              our range is contained completely within this map entry
5371  */
5372                 if (dst_end <= entry->vme_end) {
5373                         vm_map_unlock(dst_map);
5374                         return KERN_SUCCESS;
5375                 }
5376 /*
5377  *              check that range specified is contiguous region
5378  */
5379                 if ((next == vm_map_to_entry(dst_map)) ||
5380                     (next->vme_start != entry->vme_end)) {
5381                         vm_map_unlock(dst_map);
5382                         return(KERN_INVALID_ADDRESS);
5383                 }
5384
5385                 /*
5386                  *      Check for permanent objects in the destination.
5387                  */
5388                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5389                     ((!entry->object.vm_object->internal) ||
5390                      (entry->object.vm_object->true_share))) {
5391                         if(encountered_sub_map) {
5392                                 vm_map_unlock(dst_map);
5393                                 return(KERN_FAILURE);
5394                         }
5395                 }
5396
5397
5398                 entry = next;
5399         }/* for */
5400         vm_map_unlock(dst_map);
5401         return(KERN_SUCCESS);
5402 }
5403
5404 /*
5405  *      Routine:        vm_map_copy_overwrite
5406  *
5407  *      Description:
5408  *              Copy the memory described by the map copy
5409  *              object (copy; returned by vm_map_copyin) onto
5410  *              the specified destination region (dst_map, dst_addr).
5411  *              The destination must be writeable.
5412  *
5413  *              Unlike vm_map_copyout, this routine actually
5414  *              writes over previously-mapped memory.  If the
5415  *              previous mapping was to a permanent (user-supplied)
5416  *              memory object, it is preserved.
5417  *
5418  *              The attributes (protection and inheritance) of the
5419  *              destination region are preserved.
5420  *
5421  *              If successful, consumes the copy object.
5422  *              Otherwise, the caller is responsible for it.
5423  *
5424  *      Implementation notes:
5425  *              To overwrite aligned temporary virtual memory, it is
5426  *              sufficient to remove the previous mapping and insert
5427  *              the new copy.  This replacement is done either on
5428  *              the whole region (if no permanent virtual memory
5429  *              objects are embedded in the destination region) or
5430  *              in individual map entries.
5431  *
5432  *              To overwrite permanent virtual memory , it is necessary
5433  *              to copy each page, as the external memory management
5434  *              interface currently does not provide any optimizations.
5435  *
5436  *              Unaligned memory also has to be copied.  It is possible
5437  *              to use 'vm_trickery' to copy the aligned data.  This is
5438  *              not done but not hard to implement.
5439  *
5440  *              Once a page of permanent memory has been overwritten,
5441  *              it is impossible to interrupt this function; otherwise,
5442  *              the call would be neither atomic nor location-independent.
5443  *              The kernel-state portion of a user thread must be
5444  *              interruptible.
5445  *
5446  *              It may be expensive to forward all requests that might
5447  *              overwrite permanent memory (vm_write, vm_copy) to
5448  *              uninterruptible kernel threads.  This routine may be
5449  *              called by interruptible threads; however, success is
5450  *              not guaranteed -- if the request cannot be performed
5451  *              atomically and interruptibly, an error indication is
5452  *              returned.
5453  */
5454
5455 static kern_return_t
5456 vm_map_copy_overwrite_nested(
5457         vm_map_t                dst_map,
5458         vm_map_address_t        dst_addr,
5459         vm_map_copy_t           copy,
5460         boolean_t               interruptible,
5461         pmap_t                  pmap,
5462         boolean_t               discard_on_success)
5463 {
5464         vm_map_offset_t         dst_end;
5465         vm_map_entry_t          tmp_entry;
5466         vm_map_entry_t          entry;
5467         kern_return_t           kr;
5468         boolean_t               aligned = TRUE;
5469         boolean_t               contains_permanent_objects = FALSE;
5470         boolean_t               encountered_sub_map = FALSE;
5471         vm_map_offset_t         base_addr;
5472         vm_map_size_t           copy_size;
5473         vm_map_size_t           total_size;
5474
5475
5476         /*
5477          *      Check for null copy object.
5478          */
5479
5480         if (copy == VM_MAP_COPY_NULL)
5481                 return(KERN_SUCCESS);
5482
5483         /*
5484          *      Check for special kernel buffer allocated
5485          *      by new_ipc_kmsg_copyin.
5486          */
5487
5488         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5489                 return(vm_map_copyout_kernel_buffer(
5490                                dst_map, &dst_addr,
5491                                copy, TRUE));
5492         }
5493
5494         /*
5495          *      Only works for entry lists at the moment.  Will
5496          *      support page lists later.
5497          */
5498
5499         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5500
5501         if (copy->size == 0) {
5502                 if (discard_on_success)
5503                         vm_map_copy_discard(copy);
5504                 return(KERN_SUCCESS);
5505         }
5506
5507         /*
5508          *      Verify that the destination is all writeable
5509          *      initially.  We have to trunc the destination
5510          *      address and round the copy size or we'll end up
5511          *      splitting entries in strange ways.
5512          */
5513
5514         if (!page_aligned(copy->size) ||
5515             !page_aligned (copy->offset) ||
5516             !page_aligned (dst_addr))
5517         {
5518                 aligned = FALSE;
5519                 dst_end = vm_map_round_page(dst_addr + copy->size);
5520         } else {
5521                 dst_end = dst_addr + copy->size;
5522         }
5523
5524         vm_map_lock(dst_map);
5525
5526         /* LP64todo - remove this check when vm_map_commpage64()
5527          * no longer has to stuff in a map_entry for the commpage
5528          * above the map's max_offset.
5529          */
5530         if (dst_addr >= dst_map->max_offset) {
5531                 vm_map_unlock(dst_map);
5532                 return(KERN_INVALID_ADDRESS);
5533         }
5534
5535 start_pass_1:
5536         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5537                 vm_map_unlock(dst_map);
5538                 return(KERN_INVALID_ADDRESS);
5539         }
5540         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5541         for (entry = tmp_entry;;) {
5542                 vm_map_entry_t  next = entry->vme_next;
5543
5544                 while(entry->is_sub_map) {
5545                         vm_map_offset_t sub_start;
5546                         vm_map_offset_t sub_end;
5547                         vm_map_offset_t local_end;
5548
5549                         if (entry->in_transition) {
5550
5551                                 /*
5552                                  * Say that we are waiting, and wait for entry.
5553                                  */
5554                                 entry->needs_wakeup = TRUE;
5555                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5556
5557                                 goto start_pass_1;
5558                         }
5559
5560                         local_end = entry->vme_end;
5561                         if (!(entry->needs_copy)) {
5562                                 /* if needs_copy we are a COW submap */
5563                                 /* in such a case we just replace so */
5564                                 /* there is no need for the follow-  */
5565                                 /* ing check.                        */
5566                                 encountered_sub_map = TRUE;
5567                                 sub_start = entry->offset;
5568
5569                                 if(entry->vme_end < dst_end)
5570                                         sub_end = entry->vme_end;
5571                                 else
5572                                         sub_end = dst_end;
5573                                 sub_end -= entry->vme_start;
5574                                 sub_end += entry->offset;
5575                                 vm_map_unlock(dst_map);
5576
5577                                 kr = vm_map_overwrite_submap_recurse(
5578                                         entry->object.sub_map,
5579                                         sub_start,
5580                                         sub_end - sub_start);
5581                                 if(kr != KERN_SUCCESS)
5582                                         return kr;
5583                                 vm_map_lock(dst_map);
5584                         }
5585
5586                         if (dst_end <= entry->vme_end)
5587                                 goto start_overwrite;
5588                         if(!vm_map_lookup_entry(dst_map, local_end,
5589                                                 &entry)) {
5590                                 vm_map_unlock(dst_map);
5591                                 return(KERN_INVALID_ADDRESS);
5592                         }
5593                         next = entry->vme_next;
5594                 }
5595
5596                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5597                         vm_map_unlock(dst_map);
5598                         return(KERN_PROTECTION_FAILURE);
5599                 }
5600
5601                 /*
5602                  *      If the entry is in transition, we must wait
5603                  *      for it to exit that state.  Anything could happen
5604                  *      when we unlock the map, so start over.
5605                  */
5606                 if (entry->in_transition) {
5607
5608                         /*
5609                          * Say that we are waiting, and wait for entry.
5610                          */
5611                         entry->needs_wakeup = TRUE;
5612                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5613
5614                         goto start_pass_1;
5615                 }
5616
5617 /*
5618  *              our range is contained completely within this map entry
5619  */
5620                 if (dst_end <= entry->vme_end)
5621                         break;
5622 /*
5623  *              check that range specified is contiguous region
5624  */
5625                 if ((next == vm_map_to_entry(dst_map)) ||
5626                     (next->vme_start != entry->vme_end)) {
5627                         vm_map_unlock(dst_map);
5628                         return(KERN_INVALID_ADDRESS);
5629                 }
5630
5631
5632                 /*
5633                  *      Check for permanent objects in the destination.
5634                  */
5635                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5636                     ((!entry->object.vm_object->internal) ||
5637                      (entry->object.vm_object->true_share))) {
5638                         contains_permanent_objects = TRUE;
5639                 }
5640
5641                 entry = next;
5642         }/* for */
5643
5644 start_overwrite:
5645         /*
5646          *      If there are permanent objects in the destination, then
5647          *      the copy cannot be interrupted.
5648          */
5649
5650         if (interruptible && contains_permanent_objects) {
5651                 vm_map_unlock(dst_map);
5652                 return(KERN_FAILURE);   /* XXX */
5653         }
5654
5655         /*
5656          *
5657          *      Make a second pass, overwriting the data
5658          *      At the beginning of each loop iteration,
5659          *      the next entry to be overwritten is "tmp_entry"
5660          *      (initially, the value returned from the lookup above),
5661          *      and the starting address expected in that entry
5662          *      is "start".
5663          */
5664
5665         total_size = copy->size;
5666         if(encountered_sub_map) {
5667                 copy_size = 0;
5668                 /* re-calculate tmp_entry since we've had the map */
5669                 /* unlocked */
5670                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5671                         vm_map_unlock(dst_map);
5672                         return(KERN_INVALID_ADDRESS);
5673                 }
5674         } else {
5675                 copy_size = copy->size;
5676         }
5677
5678         base_addr = dst_addr;
5679         while(TRUE) {
5680                 /* deconstruct the copy object and do in parts */
5681                 /* only in sub_map, interruptable case */
5682                 vm_map_entry_t  copy_entry;
5683                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5684                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5685                 int             nentries;
5686                 int             remaining_entries = 0;
5687                 vm_map_offset_t new_offset = 0;
5688
5689                 for (entry = tmp_entry; copy_size == 0;) {
5690                         vm_map_entry_t  next;
5691
5692                         next = entry->vme_next;
5693
5694                         /* tmp_entry and base address are moved along */
5695                         /* each time we encounter a sub-map.  Otherwise */
5696                         /* entry can outpase tmp_entry, and the copy_size */
5697                         /* may reflect the distance between them */
5698                         /* if the current entry is found to be in transition */
5699                         /* we will start over at the beginning or the last */
5700                         /* encounter of a submap as dictated by base_addr */
5701                         /* we will zero copy_size accordingly. */
5702                         if (entry->in_transition) {
5703                                 /*
5704                                  * Say that we are waiting, and wait for entry.
5705                                  */
5706                                 entry->needs_wakeup = TRUE;
5707                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5708
5709                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5710                                                         &tmp_entry)) {
5711                                         vm_map_unlock(dst_map);
5712                                         return(KERN_INVALID_ADDRESS);
5713                                 }
5714                                 copy_size = 0;
5715                                 entry = tmp_entry;
5716                                 continue;
5717                         }
5718                         if(entry->is_sub_map) {
5719                                 vm_map_offset_t sub_start;
5720                                 vm_map_offset_t sub_end;
5721                                 vm_map_offset_t local_end;
5722
5723                                 if (entry->needs_copy) {
5724                                         /* if this is a COW submap */
5725                                         /* just back the range with a */
5726                                         /* anonymous entry */
5727                                         if(entry->vme_end < dst_end)
5728                                                 sub_end = entry->vme_end;
5729                                         else
5730                                                 sub_end = dst_end;
5731                                         if(entry->vme_start < base_addr)
5732                                                 sub_start = base_addr;
5733                                         else
5734                                                 sub_start = entry->vme_start;
5735                                         vm_map_clip_end(
5736                                                 dst_map, entry, sub_end);
5737                                         vm_map_clip_start(
5738                                                 dst_map, entry, sub_start);
5739                                         assert(!entry->use_pmap);
5740                                         entry->is_sub_map = FALSE;
5741                                         vm_map_deallocate(
5742                                                 entry->object.sub_map);
5743                                         entry->object.sub_map = NULL;
5744                                         entry->is_shared = FALSE;
5745                                         entry->needs_copy = FALSE;
5746                                         entry->offset = 0;
5747                                         /*
5748                                          * XXX FBDP
5749                                          * We should propagate the protections
5750                                          * of the submap entry here instead
5751                                          * of forcing them to VM_PROT_ALL...
5752                                          * Or better yet, we should inherit
5753                                          * the protection of the copy_entry.
5754                                          */
5755                                         entry->protection = VM_PROT_ALL;
5756                                         entry->max_protection = VM_PROT_ALL;
5757                                         entry->wired_count = 0;
5758                                         entry->user_wired_count = 0;
5759                                         if(entry->inheritance
5760                                            == VM_INHERIT_SHARE)
5761                                                 entry->inheritance = VM_INHERIT_COPY;
5762                                         continue;
5763                                 }
5764                                 /* first take care of any non-sub_map */
5765                                 /* entries to send */
5766                                 if(base_addr < entry->vme_start) {
5767                                         /* stuff to send */
5768                                         copy_size =
5769                                                 entry->vme_start - base_addr;
5770                                         break;
5771                                 }
5772                                 sub_start = entry->offset;
5773
5774                                 if(entry->vme_end < dst_end)
5775                                         sub_end = entry->vme_end;
5776                                 else
5777                                         sub_end = dst_end;
5778                                 sub_end -= entry->vme_start;
5779                                 sub_end += entry->offset;
5780                                 local_end = entry->vme_end;
5781                                 vm_map_unlock(dst_map);
5782                                 copy_size = sub_end - sub_start;
5783
5784                                 /* adjust the copy object */
5785                                 if (total_size > copy_size) {
5786                                         vm_map_size_t   local_size = 0;
5787                                         vm_map_size_t   entry_size;
5788
5789                                         nentries = 1;
5790                                         new_offset = copy->offset;
5791                                         copy_entry = vm_map_copy_first_entry(copy);
5792                                         while(copy_entry !=
5793                                               vm_map_copy_to_entry(copy)){
5794                                                 entry_size = copy_entry->vme_end -
5795                                                         copy_entry->vme_start;
5796                                                 if((local_size < copy_size) &&
5797                                                    ((local_size + entry_size)
5798                                                     >= copy_size)) {
5799                                                         vm_map_copy_clip_end(copy,
5800                                                                              copy_entry,
5801                                                                              copy_entry->vme_start +
5802                                                                              (copy_size - local_size));
5803                                                         entry_size = copy_entry->vme_end -
5804                                                                 copy_entry->vme_start;
5805                                                         local_size += entry_size;
5806                                                         new_offset += entry_size;
5807                                                 }
5808                                                 if(local_size >= copy_size) {
5809                                                         next_copy = copy_entry->vme_next;
5810                                                         copy_entry->vme_next =
5811                                                                 vm_map_copy_to_entry(copy);
5812                                                         previous_prev =
5813                                                                 copy->cpy_hdr.links.prev;
5814                                                         copy->cpy_hdr.links.prev = copy_entry;
5815                                                         copy->size = copy_size;
5816                                                         remaining_entries =
5817                                                                 copy->cpy_hdr.nentries;
5818                                                         remaining_entries -= nentries;
5819                                                         copy->cpy_hdr.nentries = nentries;
5820                                                         break;
5821                                                 } else {
5822                                                         local_size += entry_size;
5823                                                         new_offset += entry_size;
5824                                                         nentries++;
5825                                                 }
5826                                                 copy_entry = copy_entry->vme_next;
5827                                         }
5828                                 }
5829
5830                                 if((entry->use_pmap) && (pmap == NULL)) {
5831                                         kr = vm_map_copy_overwrite_nested(
5832                                                 entry->object.sub_map,
5833                                                 sub_start,
5834                                                 copy,
5835                                                 interruptible,
5836                                                 entry->object.sub_map->pmap,
5837                                                 TRUE);
5838                                 } else if (pmap != NULL) {
5839                                         kr = vm_map_copy_overwrite_nested(
5840                                                 entry->object.sub_map,
5841                                                 sub_start,
5842                                                 copy,
5843                                                 interruptible, pmap,
5844                                                 TRUE);
5845                                 } else {
5846                                         kr = vm_map_copy_overwrite_nested(
5847                                                 entry->object.sub_map,
5848                                                 sub_start,
5849                                                 copy,
5850                                                 interruptible,
5851                                                 dst_map->pmap,
5852                                                 TRUE);
5853                                 }
5854                                 if(kr != KERN_SUCCESS) {
5855                                         if(next_copy != NULL) {
5856                                                 copy->cpy_hdr.nentries +=
5857                                                         remaining_entries;
5858                                                 copy->cpy_hdr.links.prev->vme_next =
5859                                                         next_copy;
5860                                                 copy->cpy_hdr.links.prev
5861                                                         = previous_prev;
5862                                                 copy->size = total_size;
5863                                         }
5864                                         return kr;
5865                                 }
5866                                 if (dst_end <= local_end) {
5867                                         return(KERN_SUCCESS);
5868                                 }
5869                                 /* otherwise copy no longer exists, it was */
5870                                 /* destroyed after successful copy_overwrite */
5871                                 copy = (vm_map_copy_t)
5872                                         zalloc(vm_map_copy_zone);
5873                                 vm_map_copy_first_entry(copy) =
5874                                         vm_map_copy_last_entry(copy) =
5875                                         vm_map_copy_to_entry(copy);
5876                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
5877                                 copy->offset = new_offset;
5878
5879                                 total_size -= copy_size;
5880                                 copy_size = 0;
5881                                 /* put back remainder of copy in container */
5882                                 if(next_copy != NULL) {
5883                                         copy->cpy_hdr.nentries = remaining_entries;
5884                                         copy->cpy_hdr.links.next = next_copy;
5885                                         copy->cpy_hdr.links.prev = previous_prev;
5886                                         copy->size = total_size;
5887                                         next_copy->vme_prev =
5888                                                 vm_map_copy_to_entry(copy);
5889                                         next_copy = NULL;
5890                                 }
5891                                 base_addr = local_end;
5892                                 vm_map_lock(dst_map);
5893                                 if(!vm_map_lookup_entry(dst_map,
5894                                                         local_end, &tmp_entry)) {
5895                                         vm_map_unlock(dst_map);
5896                                         return(KERN_INVALID_ADDRESS);
5897                                 }
5898                                 entry = tmp_entry;
5899                                 continue;
5900                         }
5901                         if (dst_end <= entry->vme_end) {
5902                                 copy_size = dst_end - base_addr;
5903                                 break;
5904                         }
5905
5906                         if ((next == vm_map_to_entry(dst_map)) ||
5907                             (next->vme_start != entry->vme_end)) {
5908                                 vm_map_unlock(dst_map);
5909                                 return(KERN_INVALID_ADDRESS);
5910                         }
5911
5912                         entry = next;
5913                 }/* for */
5914
5915                 next_copy = NULL;
5916                 nentries = 1;
5917
5918                 /* adjust the copy object */
5919                 if (total_size > copy_size) {
5920                         vm_map_size_t   local_size = 0;
5921                         vm_map_size_t   entry_size;
5922
5923                         new_offset = copy->offset;
5924                         copy_entry = vm_map_copy_first_entry(copy);
5925                         while(copy_entry != vm_map_copy_to_entry(copy)) {
5926                                 entry_size = copy_entry->vme_end -
5927                                         copy_entry->vme_start;
5928                                 if((local_size < copy_size) &&
5929                                    ((local_size + entry_size)
5930                                     >= copy_size)) {
5931                                         vm_map_copy_clip_end(copy, copy_entry,
5932                                                              copy_entry->vme_start +
5933                                                              (copy_size - local_size));
5934                                         entry_size = copy_entry->vme_end -
5935                                                 copy_entry->vme_start;
5936                                         local_size += entry_size;
5937                                         new_offset += entry_size;
5938                                 }
5939                                 if(local_size >= copy_size) {
5940                                         next_copy = copy_entry->vme_next;
5941                                         copy_entry->vme_next =
5942                                                 vm_map_copy_to_entry(copy);
5943                                         previous_prev =
5944                                                 copy->cpy_hdr.links.prev;
5945                                         copy->cpy_hdr.links.prev = copy_entry;
5946                                         copy->size = copy_size;
5947                                         remaining_entries =
5948                                                 copy->cpy_hdr.nentries;
5949                                         remaining_entries -= nentries;
5950                                         copy->cpy_hdr.nentries = nentries;
5951                                         break;
5952                                 } else {
5953                                         local_size += entry_size;
5954                                         new_offset += entry_size;
5955                                         nentries++;
5956                                 }
5957                                 copy_entry = copy_entry->vme_next;
5958                         }
5959                 }
5960
5961                 if (aligned) {
5962                         pmap_t  local_pmap;
5963
5964                         if(pmap)
5965                                 local_pmap = pmap;
5966                         else
5967                                 local_pmap = dst_map->pmap;
5968
5969                         if ((kr =  vm_map_copy_overwrite_aligned(
5970                                      dst_map, tmp_entry, copy,
5971                                      base_addr, local_pmap)) != KERN_SUCCESS) {
5972                                 if(next_copy != NULL) {
5973                                         copy->cpy_hdr.nentries +=
5974                                                 remaining_entries;
5975                                         copy->cpy_hdr.links.prev->vme_next =
5976                                                 next_copy;
5977                                         copy->cpy_hdr.links.prev =
5978                                                 previous_prev;
5979                                         copy->size += copy_size;
5980                                 }
5981                                 return kr;
5982                         }
5983                         vm_map_unlock(dst_map);
5984                 } else {
5985                         /*
5986                          * Performance gain:
5987                          *
5988                          * if the copy and dst address are misaligned but the same
5989                          * offset within the page we can copy_not_aligned the
5990                          * misaligned parts and copy aligned the rest.  If they are
5991                          * aligned but len is unaligned we simply need to copy
5992                          * the end bit unaligned.  We'll need to split the misaligned
5993                          * bits of the region in this case !
5994                          */
5995                         /* ALWAYS UNLOCKS THE dst_map MAP */
5996                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
5997                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5998                                 if(next_copy != NULL) {
5999                                         copy->cpy_hdr.nentries +=
6000                                                 remaining_entries;
6001                                         copy->cpy_hdr.links.prev->vme_next =
6002                                                 next_copy;
6003                                         copy->cpy_hdr.links.prev =
6004                                                 previous_prev;
6005                                         copy->size += copy_size;
6006                                 }
6007                                 return kr;
6008                         }
6009                 }
6010                 total_size -= copy_size;
6011                 if(total_size == 0)
6012                         break;
6013                 base_addr += copy_size;
6014                 copy_size = 0;
6015                 copy->offset = new_offset;
6016                 if(next_copy != NULL) {
6017                         copy->cpy_hdr.nentries = remaining_entries;
6018                         copy->cpy_hdr.links.next = next_copy;
6019                         copy->cpy_hdr.links.prev = previous_prev;
6020                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6021                         copy->size = total_size;
6022                 }
6023                 vm_map_lock(dst_map);
6024                 while(TRUE) {
6025                         if (!vm_map_lookup_entry(dst_map,
6026                                                  base_addr, &tmp_entry)) {
6027                                 vm_map_unlock(dst_map);
6028                                 return(KERN_INVALID_ADDRESS);
6029                         }
6030                         if (tmp_entry->in_transition) {
6031                                 entry->needs_wakeup = TRUE;
6032                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6033                         } else {
6034                                 break;
6035                         }
6036                 }
6037                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6038
6039                 entry = tmp_entry;
6040         } /* while */
6041
6042         /*
6043          *      Throw away the vm_map_copy object
6044          */
6045         if (discard_on_success)
6046                 vm_map_copy_discard(copy);
6047
6048         return(KERN_SUCCESS);
6049 }/* vm_map_copy_overwrite */
6050
6051 kern_return_t
6052 vm_map_copy_overwrite(
6053         vm_map_t        dst_map,
6054         vm_map_offset_t dst_addr,
6055         vm_map_copy_t   copy,
6056         boolean_t       interruptible)
6057 {
6058         vm_map_size_t   head_size, tail_size;
6059         vm_map_copy_t   head_copy, tail_copy;
6060         vm_map_offset_t head_addr, tail_addr;
6061         vm_map_entry_t  entry;
6062         kern_return_t   kr;
6063
6064         head_size = 0;
6065         tail_size = 0;
6066         head_copy = NULL;
6067         tail_copy = NULL;
6068         head_addr = 0;
6069         tail_addr = 0;
6070
6071         if (interruptible ||
6072             copy == VM_MAP_COPY_NULL ||
6073             copy->type != VM_MAP_COPY_ENTRY_LIST) {
6074                 /*
6075                  * We can't split the "copy" map if we're interruptible
6076                  * or if we don't have a "copy" map...
6077                  */
6078         blunt_copy:
6079                 return vm_map_copy_overwrite_nested(dst_map,
6080                                                     dst_addr,
6081                                                     copy,
6082                                                     interruptible,
6083                                                     (pmap_t) NULL,
6084                                                     TRUE);
6085         }
6086
6087         if (copy->size < 3 * PAGE_SIZE) {
6088                 /*
6089                  * Too small to bother with optimizing...
6090                  */
6091                 goto blunt_copy;
6092         }
6093
6094         if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6095                 /*
6096                  * Incompatible mis-alignment of source and destination...
6097                  */
6098                 goto blunt_copy;
6099         }
6100
6101         /*
6102          * Proper alignment or identical mis-alignment at the beginning.
6103          * Let's try and do a small unaligned copy first (if needed)
6104          * and then an aligned copy for the rest.
6105          */
6106         if (!page_aligned(dst_addr)) {
6107                 head_addr = dst_addr;
6108                 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6109         }
6110         if (!page_aligned(copy->offset + copy->size)) {
6111                 /*
6112                  * Mis-alignment at the end.
6113                  * Do an aligned copy up to the last page and
6114                  * then an unaligned copy for the remaining bytes.
6115                  */
6116                 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6117                 tail_addr = dst_addr + copy->size - tail_size;
6118         }
6119
6120         if (head_size + tail_size == copy->size) {
6121                 /*
6122                  * It's all unaligned, no optimization possible...
6123                  */
6124                 goto blunt_copy;
6125         }
6126
6127         /*
6128          * Can't optimize if there are any submaps in the
6129          * destination due to the way we free the "copy" map
6130          * progressively in vm_map_copy_overwrite_nested()
6131          * in that case.
6132          */
6133         vm_map_lock_read(dst_map);
6134         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6135                 vm_map_unlock_read(dst_map);
6136                 goto blunt_copy;
6137         }
6138         for (;
6139              (entry != vm_map_copy_to_entry(copy) &&
6140               entry->vme_start < dst_addr + copy->size);
6141              entry = entry->vme_next) {
6142                 if (entry->is_sub_map) {
6143                         vm_map_unlock_read(dst_map);
6144                         goto blunt_copy;
6145                 }
6146         }
6147         vm_map_unlock_read(dst_map);
6148
6149         if (head_size) {
6150                 /*
6151                  * Unaligned copy of the first "head_size" bytes, to reach
6152                  * a page boundary.
6153                  */
6154
6155                 /*
6156                  * Extract "head_copy" out of "copy".
6157                  */
6158                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6159                 vm_map_copy_first_entry(head_copy) =
6160                         vm_map_copy_to_entry(head_copy);
6161                 vm_map_copy_last_entry(head_copy) =
6162                         vm_map_copy_to_entry(head_copy);
6163                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6164                 head_copy->cpy_hdr.nentries = 0;
6165                 head_copy->cpy_hdr.entries_pageable =
6166                         copy->cpy_hdr.entries_pageable;
6167                 vm_map_store_init(&head_copy->cpy_hdr);
6168
6169                 head_copy->offset = copy->offset;
6170                 head_copy->size = head_size;
6171
6172                 copy->offset += head_size;
6173                 copy->size -= head_size;
6174
6175                 entry = vm_map_copy_first_entry(copy);
6176                 vm_map_copy_clip_end(copy, entry, copy->offset);
6177                 vm_map_copy_entry_unlink(copy, entry);
6178                 vm_map_copy_entry_link(head_copy,
6179                                        vm_map_copy_to_entry(head_copy),
6180                                        entry);
6181
6182                 /*
6183                  * Do the unaligned copy.
6184                  */
6185                 kr = vm_map_copy_overwrite_nested(dst_map,
6186                                                   head_addr,
6187                                                   head_copy,
6188                                                   interruptible,
6189                                                   (pmap_t) NULL,
6190                                                   FALSE);
6191                 if (kr != KERN_SUCCESS)
6192                         goto done;
6193         }
6194
6195         if (tail_size) {
6196                 /*
6197                  * Extract "tail_copy" out of "copy".
6198                  */
6199                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6200                 vm_map_copy_first_entry(tail_copy) =
6201                         vm_map_copy_to_entry(tail_copy);
6202                 vm_map_copy_last_entry(tail_copy) =
6203                         vm_map_copy_to_entry(tail_copy);
6204                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6205                 tail_copy->cpy_hdr.nentries = 0;
6206                 tail_copy->cpy_hdr.entries_pageable =
6207                         copy->cpy_hdr.entries_pageable;
6208                 vm_map_store_init(&tail_copy->cpy_hdr);
6209
6210                 tail_copy->offset = copy->offset + copy->size - tail_size;
6211                 tail_copy->size = tail_size;
6212
6213                 copy->size -= tail_size;
6214
6215                 entry = vm_map_copy_last_entry(copy);
6216                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6217                 entry = vm_map_copy_last_entry(copy);
6218                 vm_map_copy_entry_unlink(copy, entry);
6219                 vm_map_copy_entry_link(tail_copy,
6220                                        vm_map_copy_last_entry(tail_copy),
6221                                        entry);
6222         }
6223
6224         /*
6225          * Copy most (or possibly all) of the data.
6226          */
6227         kr = vm_map_copy_overwrite_nested(dst_map,
6228                                           dst_addr + head_size,
6229                                           copy,
6230                                           interruptible,
6231                                           (pmap_t) NULL,
6232                                           FALSE);
6233         if (kr != KERN_SUCCESS) {
6234                 goto done;
6235         }
6236
6237         if (tail_size) {
6238                 kr = vm_map_copy_overwrite_nested(dst_map,
6239                                                   tail_addr,
6240                                                   tail_copy,
6241                                                   interruptible,
6242                                                   (pmap_t) NULL,
6243                                                   FALSE);
6244         }
6245
6246 done:
6247         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6248         if (kr == KERN_SUCCESS) {
6249                 /*
6250                  * Discard all the copy maps.
6251                  */
6252                 if (head_copy) {
6253                         vm_map_copy_discard(head_copy);
6254                         head_copy = NULL;
6255                 }
6256                 vm_map_copy_discard(copy);
6257                 if (tail_copy) {
6258                         vm_map_copy_discard(tail_copy);
6259                         tail_copy = NULL;
6260                 }
6261         } else {
6262                 /*
6263                  * Re-assemble the original copy map.
6264                  */
6265                 if (head_copy) {
6266                         entry = vm_map_copy_first_entry(head_copy);
6267                         vm_map_copy_entry_unlink(head_copy, entry);
6268                         vm_map_copy_entry_link(copy,
6269                                                vm_map_copy_to_entry(copy),
6270                                                entry);
6271                         copy->offset -= head_size;
6272                         copy->size += head_size;
6273                         vm_map_copy_discard(head_copy);
6274                         head_copy = NULL;
6275                 }
6276                 if (tail_copy) {
6277                         entry = vm_map_copy_last_entry(tail_copy);
6278                         vm_map_copy_entry_unlink(tail_copy, entry);
6279                         vm_map_copy_entry_link(copy,
6280                                                vm_map_copy_last_entry(copy),
6281                                                entry);
6282                         copy->size += tail_size;
6283                         vm_map_copy_discard(tail_copy);
6284                         tail_copy = NULL;
6285                 }
6286         }
6287         return kr;
6288 }
6289
6290
6291 /*
6292  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6293  *
6294  *      Decription:
6295  *      Physically copy unaligned data
6296  *
6297  *      Implementation:
6298  *      Unaligned parts of pages have to be physically copied.  We use
6299  *      a modified form of vm_fault_copy (which understands none-aligned
6300  *      page offsets and sizes) to do the copy.  We attempt to copy as
6301  *      much memory in one go as possibly, however vm_fault_copy copies
6302  *      within 1 memory object so we have to find the smaller of "amount left"
6303  *      "source object data size" and "target object data size".  With
6304  *      unaligned data we don't need to split regions, therefore the source
6305  *      (copy) object should be one map entry, the target range may be split
6306  *      over multiple map entries however.  In any event we are pessimistic
6307  *      about these assumptions.
6308  *
6309  *      Assumptions:
6310  *      dst_map is locked on entry and is return locked on success,
6311  *      unlocked on error.
6312  */
6313
6314 static kern_return_t
6315 vm_map_copy_overwrite_unaligned(
6316         vm_map_t        dst_map,
6317         vm_map_entry_t  entry,
6318         vm_map_copy_t   copy,
6319         vm_map_offset_t start)
6320 {
6321         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6322         vm_map_version_t        version;
6323         vm_object_t             dst_object;
6324         vm_object_offset_t      dst_offset;
6325         vm_object_offset_t      src_offset;
6326         vm_object_offset_t      entry_offset;
6327         vm_map_offset_t         entry_end;
6328         vm_map_size_t           src_size,
6329                                 dst_size,
6330                                 copy_size,
6331                                 amount_left;
6332         kern_return_t           kr = KERN_SUCCESS;
6333
6334         vm_map_lock_write_to_read(dst_map);
6335
6336         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6337         amount_left = copy->size;
6338 /*
6339  *      unaligned so we never clipped this entry, we need the offset into
6340  *      the vm_object not just the data.
6341  */
6342         while (amount_left > 0) {
6343
6344                 if (entry == vm_map_to_entry(dst_map)) {
6345                         vm_map_unlock_read(dst_map);
6346                         return KERN_INVALID_ADDRESS;
6347                 }
6348
6349                 /* "start" must be within the current map entry */
6350                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6351
6352                 dst_offset = start - entry->vme_start;
6353
6354                 dst_size = entry->vme_end - start;
6355
6356                 src_size = copy_entry->vme_end -
6357                         (copy_entry->vme_start + src_offset);
6358
6359                 if (dst_size < src_size) {
6360 /*
6361  *                      we can only copy dst_size bytes before
6362  *                      we have to get the next destination entry
6363  */
6364                         copy_size = dst_size;
6365                 } else {
6366 /*
6367  *                      we can only copy src_size bytes before
6368  *                      we have to get the next source copy entry
6369  */
6370                         copy_size = src_size;
6371                 }
6372
6373                 if (copy_size > amount_left) {
6374                         copy_size = amount_left;
6375                 }
6376 /*
6377  *              Entry needs copy, create a shadow shadow object for
6378  *              Copy on write region.
6379  */
6380                 if (entry->needs_copy &&
6381                     ((entry->protection & VM_PROT_WRITE) != 0))
6382                 {
6383                         if (vm_map_lock_read_to_write(dst_map)) {
6384                                 vm_map_lock_read(dst_map);
6385                                 goto RetryLookup;
6386                         }
6387                         vm_object_shadow(&entry->object.vm_object,
6388                                          &entry->offset,
6389                                          (vm_map_size_t)(entry->vme_end
6390                                                          - entry->vme_start));
6391                         entry->needs_copy = FALSE;
6392                         vm_map_lock_write_to_read(dst_map);
6393                 }
6394                 dst_object = entry->object.vm_object;
6395 /*
6396  *              unlike with the virtual (aligned) copy we're going
6397  *              to fault on it therefore we need a target object.
6398  */
6399                 if (dst_object == VM_OBJECT_NULL) {
6400                         if (vm_map_lock_read_to_write(dst_map)) {
6401                                 vm_map_lock_read(dst_map);
6402                                 goto RetryLookup;
6403                         }
6404                         dst_object = vm_object_allocate((vm_map_size_t)
6405                                                         entry->vme_end - entry->vme_start);
6406                         entry->object.vm_object = dst_object;
6407                         entry->offset = 0;
6408                         vm_map_lock_write_to_read(dst_map);
6409                 }
6410 /*
6411  *              Take an object reference and unlock map. The "entry" may
6412  *              disappear or change when the map is unlocked.
6413  */
6414                 vm_object_reference(dst_object);
6415                 version.main_timestamp = dst_map->timestamp;
6416                 entry_offset = entry->offset;
6417                 entry_end = entry->vme_end;
6418                 vm_map_unlock_read(dst_map);
6419 /*
6420  *              Copy as much as possible in one pass
6421  */
6422                 kr = vm_fault_copy(
6423                         copy_entry->object.vm_object,
6424                         copy_entry->offset + src_offset,
6425                         &copy_size,
6426                         dst_object,
6427                         entry_offset + dst_offset,
6428                         dst_map,
6429                         &version,
6430                         THREAD_UNINT );
6431
6432                 start += copy_size;
6433                 src_offset += copy_size;
6434                 amount_left -= copy_size;
6435 /*
6436  *              Release the object reference
6437  */
6438                 vm_object_deallocate(dst_object);
6439 /*
6440  *              If a hard error occurred, return it now
6441  */
6442                 if (kr != KERN_SUCCESS)
6443                         return kr;
6444
6445                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6446                     || amount_left == 0)
6447                 {
6448 /*
6449  *                      all done with this copy entry, dispose.
6450  */
6451                         vm_map_copy_entry_unlink(copy, copy_entry);
6452                         vm_object_deallocate(copy_entry->object.vm_object);
6453                         vm_map_copy_entry_dispose(copy, copy_entry);
6454
6455                         if ((copy_entry = vm_map_copy_first_entry(copy))
6456                             == vm_map_copy_to_entry(copy) && amount_left) {
6457 /*
6458  *                              not finished copying but run out of source
6459  */
6460                                 return KERN_INVALID_ADDRESS;
6461                         }
6462                         src_offset = 0;
6463                 }
6464
6465                 if (amount_left == 0)
6466                         return KERN_SUCCESS;
6467
6468                 vm_map_lock_read(dst_map);
6469                 if (version.main_timestamp == dst_map->timestamp) {
6470                         if (start == entry_end) {
6471 /*
6472  *                              destination region is split.  Use the version
6473  *                              information to avoid a lookup in the normal
6474  *                              case.
6475  */
6476                                 entry = entry->vme_next;
6477 /*
6478  *                              should be contiguous. Fail if we encounter
6479  *                              a hole in the destination.
6480  */
6481                                 if (start != entry->vme_start) {
6482                                         vm_map_unlock_read(dst_map);
6483                                         return KERN_INVALID_ADDRESS ;
6484                                 }
6485                         }
6486                 } else {
6487 /*
6488  *                      Map version check failed.
6489  *                      we must lookup the entry because somebody
6490  *                      might have changed the map behind our backs.
6491  */
6492                 RetryLookup:
6493                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6494                         {
6495                                 vm_map_unlock_read(dst_map);
6496                                 return KERN_INVALID_ADDRESS ;
6497                         }
6498                 }
6499         }/* while */
6500
6501         return KERN_SUCCESS;
6502 }/* vm_map_copy_overwrite_unaligned */
6503
6504 /*
6505  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6506  *
6507  *      Description:
6508  *      Does all the vm_trickery possible for whole pages.
6509  *
6510  *      Implementation:
6511  *
6512  *      If there are no permanent objects in the destination,
6513  *      and the source and destination map entry zones match,
6514  *      and the destination map entry is not shared,
6515  *      then the map entries can be deleted and replaced
6516  *      with those from the copy.  The following code is the
6517  *      basic idea of what to do, but there are lots of annoying
6518  *      little details about getting protection and inheritance
6519  *      right.  Should add protection, inheritance, and sharing checks
6520  *      to the above pass and make sure that no wiring is involved.
6521  */
6522
6523 static kern_return_t
6524 vm_map_copy_overwrite_aligned(
6525         vm_map_t        dst_map,
6526         vm_map_entry_t  tmp_entry,
6527         vm_map_copy_t   copy,
6528         vm_map_offset_t start,
6529         __unused pmap_t pmap)
6530 {
6531         vm_object_t     object;
6532         vm_map_entry_t  copy_entry;
6533         vm_map_size_t   copy_size;
6534         vm_map_size_t   size;
6535         vm_map_entry_t  entry;
6536
6537         while ((copy_entry = vm_map_copy_first_entry(copy))
6538                != vm_map_copy_to_entry(copy))
6539         {
6540                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6541
6542                 entry = tmp_entry;
6543                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6544                 if (entry == vm_map_to_entry(dst_map)) {
6545                         vm_map_unlock(dst_map);
6546                         return KERN_INVALID_ADDRESS;
6547                 }
6548                 size = (entry->vme_end - entry->vme_start);
6549                 /*
6550                  *      Make sure that no holes popped up in the
6551                  *      address map, and that the protection is
6552                  *      still valid, in case the map was unlocked
6553                  *      earlier.
6554                  */
6555
6556                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6557                                                     && !entry->needs_copy)) {
6558                         vm_map_unlock(dst_map);
6559                         return(KERN_INVALID_ADDRESS);
6560                 }
6561                 assert(entry != vm_map_to_entry(dst_map));
6562
6563                 /*
6564                  *      Check protection again
6565                  */
6566
6567                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6568                         vm_map_unlock(dst_map);
6569                         return(KERN_PROTECTION_FAILURE);
6570                 }
6571
6572                 /*
6573                  *      Adjust to source size first
6574                  */
6575
6576                 if (copy_size < size) {
6577                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6578                         size = copy_size;
6579                 }
6580
6581                 /*
6582                  *      Adjust to destination size
6583                  */
6584
6585                 if (size < copy_size) {
6586                         vm_map_copy_clip_end(copy, copy_entry,
6587                                              copy_entry->vme_start + size);
6588                         copy_size = size;
6589                 }
6590
6591                 assert((entry->vme_end - entry->vme_start) == size);
6592                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6593                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6594
6595                 /*
6596                  *      If the destination contains temporary unshared memory,
6597                  *      we can perform the copy by throwing it away and
6598                  *      installing the source data.
6599                  */
6600
6601                 object = entry->object.vm_object;
6602                 if ((!entry->is_shared &&
6603                      ((object == VM_OBJECT_NULL) ||
6604                       (object->internal && !object->true_share))) ||
6605                     entry->needs_copy) {
6606                         vm_object_t     old_object = entry->object.vm_object;
6607                         vm_object_offset_t      old_offset = entry->offset;
6608                         vm_object_offset_t      offset;
6609
6610                         /*
6611                          * Ensure that the source and destination aren't
6612                          * identical
6613                          */
6614                         if (old_object == copy_entry->object.vm_object &&
6615                             old_offset == copy_entry->offset) {
6616                                 vm_map_copy_entry_unlink(copy, copy_entry);
6617                                 vm_map_copy_entry_dispose(copy, copy_entry);
6618
6619                                 if (old_object != VM_OBJECT_NULL)
6620                                         vm_object_deallocate(old_object);
6621
6622                                 start = tmp_entry->vme_end;
6623                                 tmp_entry = tmp_entry->vme_next;
6624                                 continue;
6625                         }
6626
6627                         if (old_object != VM_OBJECT_NULL) {
6628                                 if(entry->is_sub_map) {
6629                                         if(entry->use_pmap) {
6630 #ifndef NO_NESTED_PMAP
6631                                                 pmap_unnest(dst_map->pmap,
6632                                                             (addr64_t)entry->vme_start,
6633                                                             entry->vme_end - entry->vme_start);
6634 #endif  /* NO_NESTED_PMAP */
6635                                                 if(dst_map->mapped) {
6636                                                         /* clean up parent */
6637                                                         /* map/maps */
6638                                                         vm_map_submap_pmap_clean(
6639                                                                 dst_map, entry->vme_start,
6640                                                                 entry->vme_end,
6641                                                                 entry->object.sub_map,
6642                                                                 entry->offset);
6643                                                 }
6644                                         } else {
6645                                                 vm_map_submap_pmap_clean(
6646                                                         dst_map, entry->vme_start,
6647                                                         entry->vme_end,
6648                                                         entry->object.sub_map,
6649                                                         entry->offset);
6650                                         }
6651                                         vm_map_deallocate(
6652                                                 entry->object.sub_map);
6653                                 } else {
6654                                         if(dst_map->mapped) {
6655                                                 vm_object_pmap_protect(
6656                                                         entry->object.vm_object,
6657                                                         entry->offset,
6658                                                         entry->vme_end
6659                                                         - entry->vme_start,
6660                                                         PMAP_NULL,
6661                                                         entry->vme_start,
6662                                                         VM_PROT_NONE);
6663                                         } else {
6664                                                 pmap_remove(dst_map->pmap,
6665                                                             (addr64_t)(entry->vme_start),
6666                                                             (addr64_t)(entry->vme_end));
6667                                         }
6668                                         vm_object_deallocate(old_object);
6669                                 }
6670                         }
6671
6672                         entry->is_sub_map = FALSE;
6673                         entry->object = copy_entry->object;
6674                         object = entry->object.vm_object;
6675                         entry->needs_copy = copy_entry->needs_copy;
6676                         entry->wired_count = 0;
6677                         entry->user_wired_count = 0;
6678                         offset = entry->offset = copy_entry->offset;
6679
6680                         vm_map_copy_entry_unlink(copy, copy_entry);
6681                         vm_map_copy_entry_dispose(copy, copy_entry);
6682
6683                         /*
6684                          * we could try to push pages into the pmap at this point, BUT
6685                          * this optimization only saved on average 2 us per page if ALL
6686                          * the pages in the source were currently mapped
6687                          * and ALL the pages in the dest were touched, if there were fewer
6688                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6689                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6690                          */
6691
6692                         /*
6693                          *      Set up for the next iteration.  The map
6694                          *      has not been unlocked, so the next
6695                          *      address should be at the end of this
6696                          *      entry, and the next map entry should be
6697                          *      the one following it.
6698                          */
6699
6700                         start = tmp_entry->vme_end;
6701                         tmp_entry = tmp_entry->vme_next;
6702                 } else {
6703                         vm_map_version_t        version;
6704                         vm_object_t             dst_object = entry->object.vm_object;
6705                         vm_object_offset_t      dst_offset = entry->offset;
6706                         kern_return_t           r;
6707
6708                         /*
6709                          *      Take an object reference, and record
6710                          *      the map version information so that the
6711                          *      map can be safely unlocked.
6712                          */
6713
6714                         vm_object_reference(dst_object);
6715
6716                         /* account for unlock bumping up timestamp */
6717                         version.main_timestamp = dst_map->timestamp + 1;
6718
6719                         vm_map_unlock(dst_map);
6720
6721                         /*
6722                          *      Copy as much as possible in one pass
6723                          */
6724
6725                         copy_size = size;
6726                         r = vm_fault_copy(
6727                                 copy_entry->object.vm_object,
6728                                 copy_entry->offset,
6729                                 &copy_size,
6730                                 dst_object,
6731                                 dst_offset,
6732                                 dst_map,
6733                                 &version,
6734                                 THREAD_UNINT );
6735
6736                         /*
6737                          *      Release the object reference
6738                          */
6739
6740                         vm_object_deallocate(dst_object);
6741
6742                         /*
6743                          *      If a hard error occurred, return it now
6744                          */
6745
6746                         if (r != KERN_SUCCESS)
6747                                 return(r);
6748
6749                         if (copy_size != 0) {
6750                                 /*
6751                                  *      Dispose of the copied region
6752                                  */
6753
6754                                 vm_map_copy_clip_end(copy, copy_entry,
6755                                                      copy_entry->vme_start + copy_size);
6756                                 vm_map_copy_entry_unlink(copy, copy_entry);
6757                                 vm_object_deallocate(copy_entry->object.vm_object);
6758                                 vm_map_copy_entry_dispose(copy, copy_entry);
6759                         }
6760
6761                         /*
6762                          *      Pick up in the destination map where we left off.
6763                          *
6764                          *      Use the version information to avoid a lookup
6765                          *      in the normal case.
6766                          */
6767
6768                         start += copy_size;
6769                         vm_map_lock(dst_map);
6770                         if (version.main_timestamp == dst_map->timestamp) {
6771                                 /* We can safely use saved tmp_entry value */
6772
6773                                 vm_map_clip_end(dst_map, tmp_entry, start);
6774                                 tmp_entry = tmp_entry->vme_next;
6775                         } else {
6776                                 /* Must do lookup of tmp_entry */
6777
6778                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6779                                         vm_map_unlock(dst_map);
6780                                         return(KERN_INVALID_ADDRESS);
6781                                 }
6782                                 vm_map_clip_start(dst_map, tmp_entry, start);
6783                         }
6784                 }
6785         }/* while */
6786
6787         return(KERN_SUCCESS);
6788 }/* vm_map_copy_overwrite_aligned */
6789
6790 /*
6791  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
6792  *
6793  *      Description:
6794  *              Copy in data to a kernel buffer from space in the
6795  *              source map. The original space may be optionally
6796  *              deallocated.
6797  *
6798  *              If successful, returns a new copy object.
6799  */
6800 static kern_return_t
6801 vm_map_copyin_kernel_buffer(
6802         vm_map_t        src_map,
6803         vm_map_offset_t src_addr,
6804         vm_map_size_t   len,
6805         boolean_t       src_destroy,
6806         vm_map_copy_t   *copy_result)
6807 {
6808         kern_return_t kr;
6809         vm_map_copy_t copy;
6810         vm_size_t kalloc_size;
6811
6812         if ((vm_size_t) len != len) {
6813                 /* "len" is too big and doesn't fit in a "vm_size_t" */
6814                 return KERN_RESOURCE_SHORTAGE;
6815         }
6816         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6817         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6818
6819         copy = (vm_map_copy_t) kalloc(kalloc_size);
6820         if (copy == VM_MAP_COPY_NULL) {
6821                 return KERN_RESOURCE_SHORTAGE;
6822         }
6823         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6824         copy->size = len;
6825         copy->offset = 0;
6826         copy->cpy_kdata = (void *) (copy + 1);
6827         copy->cpy_kalloc_size = kalloc_size;
6828
6829         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6830         if (kr != KERN_SUCCESS) {
6831                 kfree(copy, kalloc_size);
6832                 return kr;
6833         }
6834         if (src_destroy) {
6835                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6836                                      vm_map_round_page(src_addr + len),
6837                                      VM_MAP_REMOVE_INTERRUPTIBLE |
6838                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6839                                      (src_map == kernel_map) ?
6840                                      VM_MAP_REMOVE_KUNWIRE : 0);
6841         }
6842         *copy_result = copy;
6843         return KERN_SUCCESS;
6844 }
6845
6846 /*
6847  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
6848  *
6849  *      Description:
6850  *              Copy out data from a kernel buffer into space in the
6851  *              destination map. The space may be otpionally dynamically
6852  *              allocated.
6853  *
6854  *              If successful, consumes the copy object.
6855  *              Otherwise, the caller is responsible for it.
6856  */
6857 static int vm_map_copyout_kernel_buffer_failures = 0;
6858 static kern_return_t
6859 vm_map_copyout_kernel_buffer(
6860         vm_map_t                map,
6861         vm_map_address_t        *addr,  /* IN/OUT */
6862         vm_map_copy_t           copy,
6863         boolean_t               overwrite)
6864 {
6865         kern_return_t kr = KERN_SUCCESS;
6866         thread_t thread = current_thread();
6867
6868         if (!overwrite) {
6869
6870                 /*
6871                  * Allocate space in the target map for the data
6872                  */
6873                 *addr = 0;
6874                 kr = vm_map_enter(map,
6875                                   addr,
6876                                   vm_map_round_page(copy->size),
6877                                   (vm_map_offset_t) 0,
6878                                   VM_FLAGS_ANYWHERE,
6879                                   VM_OBJECT_NULL,
6880                                   (vm_object_offset_t) 0,
6881                                   FALSE,
6882                                   VM_PROT_DEFAULT,
6883                                   VM_PROT_ALL,
6884                                   VM_INHERIT_DEFAULT);
6885                 if (kr != KERN_SUCCESS)
6886                         return kr;
6887         }
6888
6889         /*
6890          * Copyout the data from the kernel buffer to the target map.
6891          */
6892         if (thread->map == map) {
6893
6894                 /*
6895                  * If the target map is the current map, just do
6896                  * the copy.
6897                  */
6898                 assert((vm_size_t) copy->size == copy->size);
6899                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6900                         kr = KERN_INVALID_ADDRESS;
6901                 }
6902         }
6903         else {
6904                 vm_map_t oldmap;
6905
6906                 /*
6907                  * If the target map is another map, assume the
6908                  * target's address space identity for the duration
6909                  * of the copy.
6910                  */
6911                 vm_map_reference(map);
6912                 oldmap = vm_map_switch(map);
6913
6914                 assert((vm_size_t) copy->size == copy->size);
6915                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6916                         vm_map_copyout_kernel_buffer_failures++;
6917                         kr = KERN_INVALID_ADDRESS;
6918                 }
6919
6920                 (void) vm_map_switch(oldmap);
6921                 vm_map_deallocate(map);
6922         }
6923
6924         if (kr != KERN_SUCCESS) {
6925                 /* the copy failed, clean up */
6926                 if (!overwrite) {
6927                         /*
6928                          * Deallocate the space we allocated in the target map.
6929                          */
6930                         (void) vm_map_remove(map,
6931                                              vm_map_trunc_page(*addr),
6932                                              vm_map_round_page(*addr +
6933                                                                vm_map_round_page(copy->size)),
6934                                              VM_MAP_NO_FLAGS);
6935                         *addr = 0;
6936                 }
6937         } else {
6938                 /* copy was successful, dicard the copy structure */
6939                 kfree(copy, copy->cpy_kalloc_size);
6940         }
6941
6942         return kr;
6943 }
6944
6945 /*
6946  *      Macro:          vm_map_copy_insert
6947  *
6948  *      Description:
6949  *              Link a copy chain ("copy") into a map at the
6950  *              specified location (after "where").
6951  *      Side effects:
6952  *              The copy chain is destroyed.
6953  *      Warning:
6954  *              The arguments are evaluated multiple times.
6955  */
6956 #define vm_map_copy_insert(map, where, copy)                            \
6957 MACRO_BEGIN                                                             \
6958         vm_map_store_copy_insert(map, where, copy);       \
6959         zfree(vm_map_copy_zone, copy);          \
6960 MACRO_END
6961
6962 /*
6963  *      Routine:        vm_map_copyout
6964  *
6965  *      Description:
6966  *              Copy out a copy chain ("copy") into newly-allocated
6967  *              space in the destination map.
6968  *
6969  *              If successful, consumes the copy object.
6970  *              Otherwise, the caller is responsible for it.
6971  */
6972 kern_return_t
6973 vm_map_copyout(
6974         vm_map_t                dst_map,
6975         vm_map_address_t        *dst_addr,      /* OUT */
6976         vm_map_copy_t           copy)
6977 {
6978         vm_map_size_t           size;
6979         vm_map_size_t           adjustment;
6980         vm_map_offset_t         start;
6981         vm_object_offset_t      vm_copy_start;
6982         vm_map_entry_t          last;
6983         register
6984         vm_map_entry_t          entry;
6985
6986         /*
6987          *      Check for null copy object.
6988          */
6989
6990         if (copy == VM_MAP_COPY_NULL) {
6991                 *dst_addr = 0;
6992                 return(KERN_SUCCESS);
6993         }
6994
6995         /*
6996          *      Check for special copy object, created
6997          *      by vm_map_copyin_object.
6998          */
6999
7000         if (copy->type == VM_MAP_COPY_OBJECT) {
7001                 vm_object_t             object = copy->cpy_object;
7002                 kern_return_t           kr;
7003                 vm_object_offset_t      offset;
7004
7005                 offset = vm_object_trunc_page(copy->offset);
7006                 size = vm_map_round_page(copy->size +
7007                                          (vm_map_size_t)(copy->offset - offset));
7008                 *dst_addr = 0;
7009                 kr = vm_map_enter(dst_map, dst_addr, size,
7010                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7011                                   object, offset, FALSE,
7012                                   VM_PROT_DEFAULT, VM_PROT_ALL,
7013                                   VM_INHERIT_DEFAULT);
7014                 if (kr != KERN_SUCCESS)
7015                         return(kr);
7016                 /* Account for non-pagealigned copy object */
7017                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7018                 zfree(vm_map_copy_zone, copy);
7019                 return(KERN_SUCCESS);
7020         }
7021
7022         /*
7023          *      Check for special kernel buffer allocated
7024          *      by new_ipc_kmsg_copyin.
7025          */
7026
7027         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7028                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7029                                                     copy, FALSE));
7030         }
7031
7032         /*
7033          *      Find space for the data
7034          */
7035
7036         vm_copy_start = vm_object_trunc_page(copy->offset);
7037         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7038                 - vm_copy_start;
7039
7040 StartAgain: ;
7041
7042         vm_map_lock(dst_map);
7043         if( dst_map->disable_vmentry_reuse == TRUE) {
7044                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7045                 last = entry;
7046         } else {
7047                 assert(first_free_is_valid(dst_map));
7048                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7049                 vm_map_min(dst_map) : last->vme_end;
7050         }
7051
7052         while (TRUE) {
7053                 vm_map_entry_t  next = last->vme_next;
7054                 vm_map_offset_t end = start + size;
7055
7056                 if ((end > dst_map->max_offset) || (end < start)) {
7057                         if (dst_map->wait_for_space) {
7058                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7059                                         assert_wait((event_t) dst_map,
7060                                                     THREAD_INTERRUPTIBLE);
7061                                         vm_map_unlock(dst_map);
7062                                         thread_block(THREAD_CONTINUE_NULL);
7063                                         goto StartAgain;
7064                                 }
7065                         }
7066                         vm_map_unlock(dst_map);
7067                         return(KERN_NO_SPACE);
7068                 }
7069
7070                 if ((next == vm_map_to_entry(dst_map)) ||
7071                     (next->vme_start >= end))
7072                         break;
7073
7074                 last = next;
7075                 start = last->vme_end;
7076         }
7077
7078         /*
7079          *      Since we're going to just drop the map
7080          *      entries from the copy into the destination
7081          *      map, they must come from the same pool.
7082          */
7083
7084         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7085                 /*
7086                  * Mismatches occur when dealing with the default
7087                  * pager.
7088                  */
7089                 zone_t          old_zone;
7090                 vm_map_entry_t  next, new;
7091
7092                 /*
7093                  * Find the zone that the copies were allocated from
7094                  */
7095                 old_zone = (copy->cpy_hdr.entries_pageable)
7096                         ? vm_map_entry_zone
7097                         : vm_map_kentry_zone;
7098                 entry = vm_map_copy_first_entry(copy);
7099
7100                 /*
7101                  * Reinitialize the copy so that vm_map_copy_entry_link
7102                  * will work.
7103                  */
7104                 vm_map_store_copy_reset(copy, entry);
7105                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7106
7107                 /*
7108                  * Copy each entry.
7109                  */
7110                 while (entry != vm_map_copy_to_entry(copy)) {
7111                         new = vm_map_copy_entry_create(copy);
7112                         vm_map_entry_copy_full(new, entry);
7113                         new->use_pmap = FALSE;  /* clr address space specifics */
7114                         vm_map_copy_entry_link(copy,
7115                                                vm_map_copy_last_entry(copy),
7116                                                new);
7117                         next = entry->vme_next;
7118                         zfree(old_zone, entry);
7119                         entry = next;
7120                 }
7121         }
7122
7123         /*
7124          *      Adjust the addresses in the copy chain, and
7125          *      reset the region attributes.
7126          */
7127
7128         adjustment = start - vm_copy_start;
7129         for (entry = vm_map_copy_first_entry(copy);
7130              entry != vm_map_copy_to_entry(copy);
7131              entry = entry->vme_next) {
7132                 entry->vme_start += adjustment;
7133                 entry->vme_end += adjustment;
7134
7135                 entry->inheritance = VM_INHERIT_DEFAULT;
7136                 entry->protection = VM_PROT_DEFAULT;
7137                 entry->max_protection = VM_PROT_ALL;
7138                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7139
7140                 /*
7141                  * If the entry is now wired,
7142                  * map the pages into the destination map.
7143                  */
7144                 if (entry->wired_count != 0) {
7145                         register vm_map_offset_t va;
7146                         vm_object_offset_t       offset;
7147                         register vm_object_t object;
7148                         vm_prot_t prot;
7149                         int     type_of_fault;
7150
7151                         object = entry->object.vm_object;
7152                         offset = entry->offset;
7153                         va = entry->vme_start;
7154
7155                         pmap_pageable(dst_map->pmap,
7156                                       entry->vme_start,
7157                                       entry->vme_end,
7158                                       TRUE);
7159
7160                         while (va < entry->vme_end) {
7161                                 register vm_page_t      m;
7162
7163                                 /*
7164                                  * Look up the page in the object.
7165                                  * Assert that the page will be found in the
7166                                  * top object:
7167                                  * either
7168                                  *      the object was newly created by
7169                                  *      vm_object_copy_slowly, and has
7170                                  *      copies of all of the pages from
7171                                  *      the source object
7172                                  * or
7173                                  *      the object was moved from the old
7174                                  *      map entry; because the old map
7175                                  *      entry was wired, all of the pages
7176                                  *      were in the top-level object.
7177                                  *      (XXX not true if we wire pages for
7178                                  *       reading)
7179                                  */
7180                                 vm_object_lock(object);
7181
7182                                 m = vm_page_lookup(object, offset);
7183                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7184                                     m->absent)
7185                                         panic("vm_map_copyout: wiring %p", m);
7186
7187                                 /*
7188                                  * ENCRYPTED SWAP:
7189                                  * The page is assumed to be wired here, so it
7190                                  * shouldn't be encrypted.  Otherwise, we
7191                                  * couldn't enter it in the page table, since
7192                                  * we don't want the user to see the encrypted
7193                                  * data.
7194                                  */
7195                                 ASSERT_PAGE_DECRYPTED(m);
7196
7197                                 prot = entry->protection;
7198
7199                                 if (override_nx(dst_map, entry->alias) && prot)
7200                                         prot |= VM_PROT_EXECUTE;
7201
7202                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7203
7204                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7205                                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7206                                                &type_of_fault);
7207
7208                                 vm_object_unlock(object);
7209
7210                                 offset += PAGE_SIZE_64;
7211                                 va += PAGE_SIZE;
7212                         }
7213                 }
7214         }
7215
7216         /*
7217          *      Correct the page alignment for the result
7218          */
7219
7220         *dst_addr = start + (copy->offset - vm_copy_start);
7221
7222         /*
7223          *      Update the hints and the map size
7224          */
7225
7226         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7227
7228         dst_map->size += size;
7229
7230         /*
7231          *      Link in the copy
7232          */
7233
7234         vm_map_copy_insert(dst_map, last, copy);
7235
7236         vm_map_unlock(dst_map);
7237
7238         /*
7239          * XXX  If wiring_required, call vm_map_pageable
7240          */
7241
7242         return(KERN_SUCCESS);
7243 }
7244
7245 /*
7246  *      Routine:        vm_map_copyin
7247  *
7248  *      Description:
7249  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7250  *
7251  */
7252
7253 #undef vm_map_copyin
7254
7255 kern_return_t
7256 vm_map_copyin(
7257         vm_map_t                        src_map,
7258         vm_map_address_t        src_addr,
7259         vm_map_size_t           len,
7260         boolean_t                       src_destroy,
7261         vm_map_copy_t           *copy_result)   /* OUT */
7262 {
7263         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7264                                         FALSE, copy_result, FALSE));
7265 }
7266
7267 /*
7268  *      Routine:        vm_map_copyin_common
7269  *
7270  *      Description:
7271  *              Copy the specified region (src_addr, len) from the
7272  *              source address space (src_map), possibly removing
7273  *              the region from the source address space (src_destroy).
7274  *
7275  *      Returns:
7276  *              A vm_map_copy_t object (copy_result), suitable for
7277  *              insertion into another address space (using vm_map_copyout),
7278  *              copying over another address space region (using
7279  *              vm_map_copy_overwrite).  If the copy is unused, it
7280  *              should be destroyed (using vm_map_copy_discard).
7281  *
7282  *      In/out conditions:
7283  *              The source map should not be locked on entry.
7284  */
7285
7286 typedef struct submap_map {
7287         vm_map_t        parent_map;
7288         vm_map_offset_t base_start;
7289         vm_map_offset_t base_end;
7290         vm_map_size_t   base_len;
7291         struct submap_map *next;
7292 } submap_map_t;
7293
7294 kern_return_t
7295 vm_map_copyin_common(
7296         vm_map_t        src_map,
7297         vm_map_address_t src_addr,
7298         vm_map_size_t   len,
7299         boolean_t       src_destroy,
7300         __unused boolean_t      src_volatile,
7301         vm_map_copy_t   *copy_result,   /* OUT */
7302         boolean_t       use_maxprot)
7303 {
7304         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7305                                          * in multi-level lookup, this
7306                                          * entry contains the actual
7307                                          * vm_object/offset.
7308                                          */
7309         register
7310         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7311
7312         vm_map_offset_t src_start;      /* Start of current entry --
7313                                          * where copy is taking place now
7314                                          */
7315         vm_map_offset_t src_end;        /* End of entire region to be
7316                                          * copied */
7317         vm_map_offset_t src_base;
7318         vm_map_t        base_map = src_map;
7319         boolean_t       map_share=FALSE;
7320         submap_map_t    *parent_maps = NULL;
7321
7322         register
7323         vm_map_copy_t   copy;           /* Resulting copy */
7324         vm_map_address_t        copy_addr;
7325
7326         /*
7327          *      Check for copies of zero bytes.
7328          */
7329
7330         if (len == 0) {
7331                 *copy_result = VM_MAP_COPY_NULL;
7332                 return(KERN_SUCCESS);
7333         }
7334
7335         /*
7336          *      Check that the end address doesn't overflow
7337          */
7338         src_end = src_addr + len;
7339         if (src_end < src_addr)
7340                 return KERN_INVALID_ADDRESS;
7341
7342         /*
7343          * If the copy is sufficiently small, use a kernel buffer instead
7344          * of making a virtual copy.  The theory being that the cost of
7345          * setting up VM (and taking C-O-W faults) dominates the copy costs
7346          * for small regions.
7347          */
7348         if ((len < msg_ool_size_small) && !use_maxprot)
7349                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7350                                                    src_destroy, copy_result);
7351
7352         /*
7353          *      Compute (page aligned) start and end of region
7354          */
7355         src_start = vm_map_trunc_page(src_addr);
7356         src_end = vm_map_round_page(src_end);
7357
7358         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7359
7360         /*
7361          *      Allocate a header element for the list.
7362          *
7363          *      Use the start and end in the header to
7364          *      remember the endpoints prior to rounding.
7365          */
7366
7367         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7368         vm_map_copy_first_entry(copy) =
7369                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7370         copy->type = VM_MAP_COPY_ENTRY_LIST;
7371         copy->cpy_hdr.nentries = 0;
7372         copy->cpy_hdr.entries_pageable = TRUE;
7373
7374         vm_map_store_init( &(copy->cpy_hdr) );
7375
7376         copy->offset = src_addr;
7377         copy->size = len;
7378
7379         new_entry = vm_map_copy_entry_create(copy);
7380
7381 #define RETURN(x)                                               \
7382         MACRO_BEGIN                                             \
7383         vm_map_unlock(src_map);                                 \
7384         if(src_map != base_map)                                 \
7385                 vm_map_deallocate(src_map);                     \
7386         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7387                 vm_map_copy_entry_dispose(copy,new_entry);      \
7388         vm_map_copy_discard(copy);                              \
7389         {                                                       \
7390                 submap_map_t    *_ptr;                          \
7391                                                                 \
7392                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7393                         parent_maps=parent_maps->next;          \
7394                         if (_ptr->parent_map != base_map)       \
7395                                 vm_map_deallocate(_ptr->parent_map);    \
7396                         kfree(_ptr, sizeof(submap_map_t));      \
7397                 }                                               \
7398         }                                                       \
7399         MACRO_RETURN(x);                                        \
7400         MACRO_END
7401
7402         /*
7403          *      Find the beginning of the region.
7404          */
7405
7406         vm_map_lock(src_map);
7407
7408         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7409                 RETURN(KERN_INVALID_ADDRESS);
7410         if(!tmp_entry->is_sub_map) {
7411                 vm_map_clip_start(src_map, tmp_entry, src_start);
7412         }
7413         /* set for later submap fix-up */
7414         copy_addr = src_start;
7415
7416         /*
7417          *      Go through entries until we get to the end.
7418          */
7419
7420         while (TRUE) {
7421                 register
7422                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7423                 vm_map_size_t   src_size;               /* Size of source
7424                                                          * map entry (in both
7425                                                          * maps)
7426                                                          */
7427
7428                 register
7429                 vm_object_t             src_object;     /* Object to copy */
7430                 vm_object_offset_t      src_offset;
7431
7432                 boolean_t       src_needs_copy;         /* Should source map
7433                                                          * be made read-only
7434                                                          * for copy-on-write?
7435                                                          */
7436
7437                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7438
7439                 boolean_t       was_wired;              /* Was source wired? */
7440                 vm_map_version_t version;               /* Version before locks
7441                                                          * dropped to make copy
7442                                                          */
7443                 kern_return_t   result;                 /* Return value from
7444                                                          * copy_strategically.
7445                                                          */
7446                 while(tmp_entry->is_sub_map) {
7447                         vm_map_size_t submap_len;
7448                         submap_map_t *ptr;
7449
7450                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7451                         ptr->next = parent_maps;
7452                         parent_maps = ptr;
7453                         ptr->parent_map = src_map;
7454                         ptr->base_start = src_start;
7455                         ptr->base_end = src_end;
7456                         submap_len = tmp_entry->vme_end - src_start;
7457                         if(submap_len > (src_end-src_start))
7458                                 submap_len = src_end-src_start;
7459                         ptr->base_len = submap_len;
7460
7461                         src_start -= tmp_entry->vme_start;
7462                         src_start += tmp_entry->offset;
7463                         src_end = src_start + submap_len;
7464                         src_map = tmp_entry->object.sub_map;
7465                         vm_map_lock(src_map);
7466                         /* keep an outstanding reference for all maps in */
7467                         /* the parents tree except the base map */
7468                         vm_map_reference(src_map);
7469                         vm_map_unlock(ptr->parent_map);
7470                         if (!vm_map_lookup_entry(
7471                                     src_map, src_start, &tmp_entry))
7472                                 RETURN(KERN_INVALID_ADDRESS);
7473                         map_share = TRUE;
7474                         if(!tmp_entry->is_sub_map)
7475                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7476                         src_entry = tmp_entry;
7477                 }
7478                 /* we are now in the lowest level submap... */
7479
7480                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7481                     (tmp_entry->object.vm_object->phys_contiguous)) {
7482                         /* This is not, supported for now.In future */
7483                         /* we will need to detect the phys_contig   */
7484                         /* condition and then upgrade copy_slowly   */
7485                         /* to do physical copy from the device mem  */
7486                         /* based object. We can piggy-back off of   */
7487                         /* the was wired boolean to set-up the      */
7488                         /* proper handling */
7489                         RETURN(KERN_PROTECTION_FAILURE);
7490                 }
7491                 /*
7492                  *      Create a new address map entry to hold the result.
7493                  *      Fill in the fields from the appropriate source entries.
7494                  *      We must unlock the source map to do this if we need
7495                  *      to allocate a map entry.
7496                  */
7497                 if (new_entry == VM_MAP_ENTRY_NULL) {
7498                         version.main_timestamp = src_map->timestamp;
7499                         vm_map_unlock(src_map);
7500
7501                         new_entry = vm_map_copy_entry_create(copy);
7502
7503                         vm_map_lock(src_map);
7504                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7505                                 if (!vm_map_lookup_entry(src_map, src_start,
7506                                                          &tmp_entry)) {
7507                                         RETURN(KERN_INVALID_ADDRESS);
7508                                 }
7509                                 if (!tmp_entry->is_sub_map)
7510                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7511                                 continue; /* restart w/ new tmp_entry */
7512                         }
7513                 }
7514
7515                 /*
7516                  *      Verify that the region can be read.
7517                  */
7518                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7519                      !use_maxprot) ||
7520                     (src_entry->max_protection & VM_PROT_READ) == 0)
7521                         RETURN(KERN_PROTECTION_FAILURE);
7522
7523                 /*
7524                  *      Clip against the endpoints of the entire region.
7525                  */
7526
7527                 vm_map_clip_end(src_map, src_entry, src_end);
7528
7529                 src_size = src_entry->vme_end - src_start;
7530                 src_object = src_entry->object.vm_object;
7531                 src_offset = src_entry->offset;
7532                 was_wired = (src_entry->wired_count != 0);
7533
7534                 vm_map_entry_copy(new_entry, src_entry);
7535                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7536
7537                 /*
7538                  *      Attempt non-blocking copy-on-write optimizations.
7539                  */
7540
7541                 if (src_destroy &&
7542                     (src_object == VM_OBJECT_NULL ||
7543                      (src_object->internal && !src_object->true_share
7544                       && !map_share))) {
7545                         /*
7546                          * If we are destroying the source, and the object
7547                          * is internal, we can move the object reference
7548                          * from the source to the copy.  The copy is
7549                          * copy-on-write only if the source is.
7550                          * We make another reference to the object, because
7551                          * destroying the source entry will deallocate it.
7552                          */
7553                         vm_object_reference(src_object);
7554
7555                         /*
7556                          * Copy is always unwired.  vm_map_copy_entry
7557                          * set its wired count to zero.
7558                          */
7559
7560                         goto CopySuccessful;
7561                 }
7562
7563
7564         RestartCopy:
7565                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7566                     src_object, new_entry, new_entry->object.vm_object,
7567                     was_wired, 0);
7568                 if ((src_object == VM_OBJECT_NULL ||
7569                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7570                     vm_object_copy_quickly(
7571                             &new_entry->object.vm_object,
7572                             src_offset,
7573                             src_size,
7574                             &src_needs_copy,
7575                             &new_entry_needs_copy)) {
7576
7577                         new_entry->needs_copy = new_entry_needs_copy;
7578
7579                         /*
7580                          *      Handle copy-on-write obligations
7581                          */
7582
7583                         if (src_needs_copy && !tmp_entry->needs_copy) {
7584                                 vm_prot_t prot;
7585
7586                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7587
7588                                 if (override_nx(src_map, src_entry->alias) && prot)
7589                                         prot |= VM_PROT_EXECUTE;
7590
7591                                 vm_object_pmap_protect(
7592                                         src_object,
7593                                         src_offset,
7594                                         src_size,
7595                                         (src_entry->is_shared ?
7596                                          PMAP_NULL
7597                                          : src_map->pmap),
7598                                         src_entry->vme_start,
7599                                         prot);
7600
7601                                 tmp_entry->needs_copy = TRUE;
7602                         }
7603
7604                         /*
7605                          *      The map has never been unlocked, so it's safe
7606                          *      to move to the next entry rather than doing
7607                          *      another lookup.
7608                          */
7609
7610                         goto CopySuccessful;
7611                 }
7612
7613                 /*
7614                  *      Take an object reference, so that we may
7615                  *      release the map lock(s).
7616                  */
7617
7618                 assert(src_object != VM_OBJECT_NULL);
7619                 vm_object_reference(src_object);
7620
7621                 /*
7622                  *      Record the timestamp for later verification.
7623                  *      Unlock the map.
7624                  */
7625
7626                 version.main_timestamp = src_map->timestamp;
7627                 vm_map_unlock(src_map); /* Increments timestamp once! */
7628
7629                 /*
7630                  *      Perform the copy
7631                  */
7632
7633                 if (was_wired) {
7634                 CopySlowly:
7635                         vm_object_lock(src_object);
7636                         result = vm_object_copy_slowly(
7637                                 src_object,
7638                                 src_offset,
7639                                 src_size,
7640                                 THREAD_UNINT,
7641                                 &new_entry->object.vm_object);
7642                         new_entry->offset = 0;
7643                         new_entry->needs_copy = FALSE;
7644
7645                 }
7646                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7647                          (tmp_entry->is_shared  || map_share)) {
7648                         vm_object_t new_object;
7649
7650                         vm_object_lock_shared(src_object);
7651                         new_object = vm_object_copy_delayed(
7652                                 src_object,
7653                                 src_offset,
7654                                 src_size,
7655                                 TRUE);
7656                         if (new_object == VM_OBJECT_NULL)
7657                                 goto CopySlowly;
7658
7659                         new_entry->object.vm_object = new_object;
7660                         new_entry->needs_copy = TRUE;
7661                         result = KERN_SUCCESS;
7662
7663                 } else {
7664                         result = vm_object_copy_strategically(src_object,
7665                                                               src_offset,
7666                                                               src_size,
7667                                                               &new_entry->object.vm_object,
7668                                                               &new_entry->offset,
7669                                                               &new_entry_needs_copy);
7670
7671                         new_entry->needs_copy = new_entry_needs_copy;
7672                 }
7673
7674                 if (result != KERN_SUCCESS &&
7675                     result != KERN_MEMORY_RESTART_COPY) {
7676                         vm_map_lock(src_map);
7677                         RETURN(result);
7678                 }
7679
7680                 /*
7681                  *      Throw away the extra reference
7682                  */
7683
7684                 vm_object_deallocate(src_object);
7685
7686                 /*
7687                  *      Verify that the map has not substantially
7688                  *      changed while the copy was being made.
7689                  */
7690
7691                 vm_map_lock(src_map);
7692
7693                 if ((version.main_timestamp + 1) == src_map->timestamp)
7694                         goto VerificationSuccessful;
7695
7696                 /*
7697                  *      Simple version comparison failed.
7698                  *
7699                  *      Retry the lookup and verify that the
7700                  *      same object/offset are still present.
7701                  *
7702                  *      [Note: a memory manager that colludes with
7703                  *      the calling task can detect that we have
7704                  *      cheated.  While the map was unlocked, the
7705                  *      mapping could have been changed and restored.]
7706                  */
7707
7708                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7709                         RETURN(KERN_INVALID_ADDRESS);
7710                 }
7711
7712                 src_entry = tmp_entry;
7713                 vm_map_clip_start(src_map, src_entry, src_start);
7714
7715                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7716                      !use_maxprot) ||
7717                     ((src_entry->max_protection & VM_PROT_READ) == 0))
7718                         goto VerificationFailed;
7719
7720                 if (src_entry->vme_end < new_entry->vme_end)
7721                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7722
7723                 if ((src_entry->object.vm_object != src_object) ||
7724                     (src_entry->offset != src_offset) ) {
7725
7726                         /*
7727                          *      Verification failed.
7728                          *
7729                          *      Start over with this top-level entry.
7730                          */
7731
7732                 VerificationFailed: ;
7733
7734                         vm_object_deallocate(new_entry->object.vm_object);
7735                         tmp_entry = src_entry;
7736                         continue;
7737                 }
7738
7739                 /*
7740                  *      Verification succeeded.
7741                  */
7742
7743         VerificationSuccessful: ;
7744
7745                 if (result == KERN_MEMORY_RESTART_COPY)
7746                         goto RestartCopy;
7747
7748                 /*
7749                  *      Copy succeeded.
7750                  */
7751
7752         CopySuccessful: ;
7753
7754                 /*
7755                  *      Link in the new copy entry.
7756                  */
7757
7758                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7759                                        new_entry);
7760
7761                 /*
7762                  *      Determine whether the entire region
7763                  *      has been copied.
7764                  */
7765                 src_base = src_start;
7766                 src_start = new_entry->vme_end;
7767                 new_entry = VM_MAP_ENTRY_NULL;
7768                 while ((src_start >= src_end) && (src_end != 0)) {
7769                         if (src_map != base_map) {
7770                                 submap_map_t    *ptr;
7771
7772                                 ptr = parent_maps;
7773                                 assert(ptr != NULL);
7774                                 parent_maps = parent_maps->next;
7775
7776                                 /* fix up the damage we did in that submap */
7777                                 vm_map_simplify_range(src_map,
7778                                                       src_base,
7779                                                       src_end);
7780
7781                                 vm_map_unlock(src_map);
7782                                 vm_map_deallocate(src_map);
7783                                 vm_map_lock(ptr->parent_map);
7784                                 src_map = ptr->parent_map;
7785                                 src_base = ptr->base_start;
7786                                 src_start = ptr->base_start + ptr->base_len;
7787                                 src_end = ptr->base_end;
7788                                 if ((src_end > src_start) &&
7789                                     !vm_map_lookup_entry(
7790                                             src_map, src_start, &tmp_entry))
7791                                         RETURN(KERN_INVALID_ADDRESS);
7792                                 kfree(ptr, sizeof(submap_map_t));
7793                                 if(parent_maps == NULL)
7794                                         map_share = FALSE;
7795                                 src_entry = tmp_entry->vme_prev;
7796                         } else
7797                                 break;
7798                 }
7799                 if ((src_start >= src_end) && (src_end != 0))
7800                         break;
7801
7802                 /*
7803                  *      Verify that there are no gaps in the region
7804                  */
7805
7806                 tmp_entry = src_entry->vme_next;
7807                 if ((tmp_entry->vme_start != src_start) ||
7808                     (tmp_entry == vm_map_to_entry(src_map)))
7809                         RETURN(KERN_INVALID_ADDRESS);
7810         }
7811
7812         /*
7813          * If the source should be destroyed, do it now, since the
7814          * copy was successful.
7815          */
7816         if (src_destroy) {
7817                 (void) vm_map_delete(src_map,
7818                                      vm_map_trunc_page(src_addr),
7819                                      src_end,
7820                                      (src_map == kernel_map) ?
7821                                      VM_MAP_REMOVE_KUNWIRE :
7822                                      VM_MAP_NO_FLAGS,
7823                                      VM_MAP_NULL);
7824         } else {
7825                 /* fix up the damage we did in the base map */
7826                 vm_map_simplify_range(src_map,
7827                                       vm_map_trunc_page(src_addr),
7828                                       vm_map_round_page(src_end));
7829         }
7830
7831         vm_map_unlock(src_map);
7832
7833         /* Fix-up start and end points in copy.  This is necessary */
7834         /* when the various entries in the copy object were picked */
7835         /* up from different sub-maps */
7836
7837         tmp_entry = vm_map_copy_first_entry(copy);
7838         while (tmp_entry != vm_map_copy_to_entry(copy)) {
7839                 tmp_entry->vme_end = copy_addr +
7840                         (tmp_entry->vme_end - tmp_entry->vme_start);
7841                 tmp_entry->vme_start = copy_addr;
7842                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7843                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7844         }
7845
7846         *copy_result = copy;
7847         return(KERN_SUCCESS);
7848
7849 #undef  RETURN
7850 }
7851
7852 /*
7853  *      vm_map_copyin_object:
7854  *
7855  *      Create a copy object from an object.
7856  *      Our caller donates an object reference.
7857  */
7858
7859 kern_return_t
7860 vm_map_copyin_object(
7861         vm_object_t             object,
7862         vm_object_offset_t      offset, /* offset of region in object */
7863         vm_object_size_t        size,   /* size of region in object */
7864         vm_map_copy_t   *copy_result)   /* OUT */
7865 {
7866         vm_map_copy_t   copy;           /* Resulting copy */
7867
7868         /*
7869          *      We drop the object into a special copy object
7870          *      that contains the object directly.
7871          */
7872
7873         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7874         copy->type = VM_MAP_COPY_OBJECT;
7875         copy->cpy_object = object;
7876         copy->offset = offset;
7877         copy->size = size;
7878
7879         *copy_result = copy;
7880         return(KERN_SUCCESS);
7881 }
7882
7883 static void
7884 vm_map_fork_share(
7885         vm_map_t        old_map,
7886         vm_map_entry_t  old_entry,
7887         vm_map_t        new_map)
7888 {
7889         vm_object_t     object;
7890         vm_map_entry_t  new_entry;
7891
7892         /*
7893          *      New sharing code.  New map entry
7894          *      references original object.  Internal
7895          *      objects use asynchronous copy algorithm for
7896          *      future copies.  First make sure we have
7897          *      the right object.  If we need a shadow,
7898          *      or someone else already has one, then
7899          *      make a new shadow and share it.
7900          */
7901
7902         object = old_entry->object.vm_object;
7903         if (old_entry->is_sub_map) {
7904                 assert(old_entry->wired_count == 0);
7905 #ifndef NO_NESTED_PMAP
7906                 if(old_entry->use_pmap) {
7907                         kern_return_t   result;
7908
7909                         result = pmap_nest(new_map->pmap,
7910                                            (old_entry->object.sub_map)->pmap,
7911                                            (addr64_t)old_entry->vme_start,
7912                                            (addr64_t)old_entry->vme_start,
7913                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7914                         if(result)
7915                                 panic("vm_map_fork_share: pmap_nest failed!");
7916                 }
7917 #endif  /* NO_NESTED_PMAP */
7918         } else if (object == VM_OBJECT_NULL) {
7919                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7920                                                             old_entry->vme_start));
7921                 old_entry->offset = 0;
7922                 old_entry->object.vm_object = object;
7923                 assert(!old_entry->needs_copy);
7924         } else if (object->copy_strategy !=
7925                    MEMORY_OBJECT_COPY_SYMMETRIC) {
7926
7927                 /*
7928                  *      We are already using an asymmetric
7929                  *      copy, and therefore we already have
7930                  *      the right object.
7931                  */
7932
7933                 assert(! old_entry->needs_copy);
7934         }
7935         else if (old_entry->needs_copy ||       /* case 1 */
7936                  object->shadowed ||            /* case 2 */
7937                  (!object->true_share &&        /* case 3 */
7938                   !old_entry->is_shared &&
7939                   (object->vo_size >
7940                    (vm_map_size_t)(old_entry->vme_end -
7941                                    old_entry->vme_start)))) {
7942
7943                 /*
7944                  *      We need to create a shadow.
7945                  *      There are three cases here.
7946                  *      In the first case, we need to
7947                  *      complete a deferred symmetrical
7948                  *      copy that we participated in.
7949                  *      In the second and third cases,
7950                  *      we need to create the shadow so
7951                  *      that changes that we make to the
7952                  *      object do not interfere with
7953                  *      any symmetrical copies which
7954                  *      have occured (case 2) or which
7955                  *      might occur (case 3).
7956                  *
7957                  *      The first case is when we had
7958                  *      deferred shadow object creation
7959                  *      via the entry->needs_copy mechanism.
7960                  *      This mechanism only works when
7961                  *      only one entry points to the source
7962                  *      object, and we are about to create
7963                  *      a second entry pointing to the
7964                  *      same object. The problem is that
7965                  *      there is no way of mapping from
7966                  *      an object to the entries pointing
7967                  *      to it. (Deferred shadow creation
7968                  *      works with one entry because occurs
7969                  *      at fault time, and we walk from the
7970                  *      entry to the object when handling
7971                  *      the fault.)
7972                  *
7973                  *      The second case is when the object
7974                  *      to be shared has already been copied
7975                  *      with a symmetric copy, but we point
7976                  *      directly to the object without
7977                  *      needs_copy set in our entry. (This
7978                  *      can happen because different ranges
7979                  *      of an object can be pointed to by
7980                  *      different entries. In particular,
7981                  *      a single entry pointing to an object
7982                  *      can be split by a call to vm_inherit,
7983                  *      which, combined with task_create, can
7984                  *      result in the different entries
7985                  *      having different needs_copy values.)
7986                  *      The shadowed flag in the object allows
7987                  *      us to detect this case. The problem
7988                  *      with this case is that if this object
7989                  *      has or will have shadows, then we
7990                  *      must not perform an asymmetric copy
7991                  *      of this object, since such a copy
7992                  *      allows the object to be changed, which
7993                  *      will break the previous symmetrical
7994                  *      copies (which rely upon the object
7995                  *      not changing). In a sense, the shadowed
7996                  *      flag says "don't change this object".
7997                  *      We fix this by creating a shadow
7998                  *      object for this object, and sharing
7999                  *      that. This works because we are free
8000                  *      to change the shadow object (and thus
8001                  *      to use an asymmetric copy strategy);
8002                  *      this is also semantically correct,
8003                  *      since this object is temporary, and
8004                  *      therefore a copy of the object is
8005                  *      as good as the object itself. (This
8006                  *      is not true for permanent objects,
8007                  *      since the pager needs to see changes,
8008                  *      which won't happen if the changes
8009                  *      are made to a copy.)
8010                  *
8011                  *      The third case is when the object
8012                  *      to be shared has parts sticking
8013                  *      outside of the entry we're working
8014                  *      with, and thus may in the future
8015                  *      be subject to a symmetrical copy.
8016                  *      (This is a preemptive version of
8017                  *      case 2.)
8018                  */
8019                 vm_object_shadow(&old_entry->object.vm_object,
8020                                  &old_entry->offset,
8021                                  (vm_map_size_t) (old_entry->vme_end -
8022                                                   old_entry->vme_start));
8023
8024                 /*
8025                  *      If we're making a shadow for other than
8026                  *      copy on write reasons, then we have
8027                  *      to remove write permission.
8028                  */
8029
8030                 if (!old_entry->needs_copy &&
8031                     (old_entry->protection & VM_PROT_WRITE)) {
8032                         vm_prot_t prot;
8033
8034                         prot = old_entry->protection & ~VM_PROT_WRITE;
8035
8036                         if (override_nx(old_map, old_entry->alias) && prot)
8037                                 prot |= VM_PROT_EXECUTE;
8038
8039                         if (old_map->mapped) {
8040                                 vm_object_pmap_protect(
8041                                         old_entry->object.vm_object,
8042                                         old_entry->offset,
8043                                         (old_entry->vme_end -
8044                                          old_entry->vme_start),
8045                                         PMAP_NULL,
8046                                         old_entry->vme_start,
8047                                         prot);
8048                         } else {
8049                                 pmap_protect(old_map->pmap,
8050                                              old_entry->vme_start,
8051                                              old_entry->vme_end,
8052                                              prot);
8053                         }
8054                 }
8055
8056                 old_entry->needs_copy = FALSE;
8057                 object = old_entry->object.vm_object;
8058         }
8059
8060
8061         /*
8062          *      If object was using a symmetric copy strategy,
8063          *      change its copy strategy to the default
8064          *      asymmetric copy strategy, which is copy_delay
8065          *      in the non-norma case and copy_call in the
8066          *      norma case. Bump the reference count for the
8067          *      new entry.
8068          */
8069
8070         if(old_entry->is_sub_map) {
8071                 vm_map_lock(old_entry->object.sub_map);
8072                 vm_map_reference(old_entry->object.sub_map);
8073                 vm_map_unlock(old_entry->object.sub_map);
8074         } else {
8075                 vm_object_lock(object);
8076                 vm_object_reference_locked(object);
8077                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8078                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8079                 }
8080                 vm_object_unlock(object);
8081         }
8082
8083         /*
8084          *      Clone the entry, using object ref from above.
8085          *      Mark both entries as shared.
8086          */
8087
8088         new_entry = vm_map_entry_create(new_map);
8089         vm_map_entry_copy(new_entry, old_entry);
8090         old_entry->is_shared = TRUE;
8091         new_entry->is_shared = TRUE;
8092
8093         /*
8094          *      Insert the entry into the new map -- we
8095          *      know we're inserting at the end of the new
8096          *      map.
8097          */
8098
8099         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8100
8101         /*
8102          *      Update the physical map
8103          */
8104
8105         if (old_entry->is_sub_map) {
8106                 /* Bill Angell pmap support goes here */
8107         } else {
8108                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8109                           old_entry->vme_end - old_entry->vme_start,
8110                           old_entry->vme_start);
8111         }
8112 }
8113
8114 static boolean_t
8115 vm_map_fork_copy(
8116         vm_map_t        old_map,
8117         vm_map_entry_t  *old_entry_p,
8118         vm_map_t        new_map)
8119 {
8120         vm_map_entry_t old_entry = *old_entry_p;
8121         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8122         vm_map_offset_t start = old_entry->vme_start;
8123         vm_map_copy_t copy;
8124         vm_map_entry_t last = vm_map_last_entry(new_map);
8125
8126         vm_map_unlock(old_map);
8127         /*
8128          *      Use maxprot version of copyin because we
8129          *      care about whether this memory can ever
8130          *      be accessed, not just whether it's accessible
8131          *      right now.
8132          */
8133         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8134             != KERN_SUCCESS) {
8135                 /*
8136                  *      The map might have changed while it
8137                  *      was unlocked, check it again.  Skip
8138                  *      any blank space or permanently
8139                  *      unreadable region.
8140                  */
8141                 vm_map_lock(old_map);
8142                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8143                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8144                         last = last->vme_next;
8145                 }
8146                 *old_entry_p = last;
8147
8148                 /*
8149                  * XXX  For some error returns, want to
8150                  * XXX  skip to the next element.  Note
8151                  *      that INVALID_ADDRESS and
8152                  *      PROTECTION_FAILURE are handled above.
8153                  */
8154
8155                 return FALSE;
8156         }
8157
8158         /*
8159          *      Insert the copy into the new map
8160          */
8161
8162         vm_map_copy_insert(new_map, last, copy);
8163
8164         /*
8165          *      Pick up the traversal at the end of
8166          *      the copied region.
8167          */
8168
8169         vm_map_lock(old_map);
8170         start += entry_size;
8171         if (! vm_map_lookup_entry(old_map, start, &last)) {
8172                 last = last->vme_next;
8173         } else {
8174                 if (last->vme_start == start) {
8175                         /*
8176                          * No need to clip here and we don't
8177                          * want to cause any unnecessary
8178                          * unnesting...
8179                          */
8180                 } else {
8181                         vm_map_clip_start(old_map, last, start);
8182                 }
8183         }
8184         *old_entry_p = last;
8185
8186         return TRUE;
8187 }
8188
8189 /*
8190  *      vm_map_fork:
8191  *
8192  *      Create and return a new map based on the old
8193  *      map, according to the inheritance values on the
8194  *      regions in that map.
8195  *
8196  *      The source map must not be locked.
8197  */
8198 vm_map_t
8199 vm_map_fork(
8200         vm_map_t        old_map)
8201 {
8202         pmap_t          new_pmap;
8203         vm_map_t        new_map;
8204         vm_map_entry_t  old_entry;
8205         vm_map_size_t   new_size = 0, entry_size;
8206         vm_map_entry_t  new_entry;
8207         boolean_t       src_needs_copy;
8208         boolean_t       new_entry_needs_copy;
8209
8210         new_pmap = pmap_create((vm_map_size_t) 0,
8211 #if defined(__i386__) || defined(__x86_64__)
8212                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8213 #else
8214                                0
8215 #endif
8216                                );
8217 #if defined(__i386__)
8218         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8219                 pmap_set_4GB_pagezero(new_pmap);
8220 #endif
8221
8222         vm_map_reference_swap(old_map);
8223         vm_map_lock(old_map);
8224
8225         new_map = vm_map_create(new_pmap,
8226                                 old_map->min_offset,
8227                                 old_map->max_offset,
8228                                 old_map->hdr.entries_pageable);
8229         for (
8230                 old_entry = vm_map_first_entry(old_map);
8231                 old_entry != vm_map_to_entry(old_map);
8232                 ) {
8233
8234                 entry_size = old_entry->vme_end - old_entry->vme_start;
8235
8236                 switch (old_entry->inheritance) {
8237                 case VM_INHERIT_NONE:
8238                         break;
8239
8240                 case VM_INHERIT_SHARE:
8241                         vm_map_fork_share(old_map, old_entry, new_map);
8242                         new_size += entry_size;
8243                         break;
8244
8245                 case VM_INHERIT_COPY:
8246
8247                         /*
8248                          *      Inline the copy_quickly case;
8249                          *      upon failure, fall back on call
8250                          *      to vm_map_fork_copy.
8251                          */
8252
8253                         if(old_entry->is_sub_map)
8254                                 break;
8255                         if ((old_entry->wired_count != 0) ||
8256                             ((old_entry->object.vm_object != NULL) &&
8257                              (old_entry->object.vm_object->true_share))) {
8258                                 goto slow_vm_map_fork_copy;
8259                         }
8260
8261                         new_entry = vm_map_entry_create(new_map);
8262                         vm_map_entry_copy(new_entry, old_entry);
8263                         /* clear address space specifics */
8264                         new_entry->use_pmap = FALSE;
8265
8266                         if (! vm_object_copy_quickly(
8267                                     &new_entry->object.vm_object,
8268                                     old_entry->offset,
8269                                     (old_entry->vme_end -
8270                                      old_entry->vme_start),
8271                                     &src_needs_copy,
8272                                     &new_entry_needs_copy)) {
8273                                 vm_map_entry_dispose(new_map, new_entry);
8274                                 goto slow_vm_map_fork_copy;
8275                         }
8276
8277                         /*
8278                          *      Handle copy-on-write obligations
8279                          */
8280
8281                         if (src_needs_copy && !old_entry->needs_copy) {
8282                                 vm_prot_t prot;
8283
8284                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8285
8286                                 if (override_nx(old_map, old_entry->alias) && prot)
8287                                         prot |= VM_PROT_EXECUTE;
8288
8289                                 vm_object_pmap_protect(
8290                                         old_entry->object.vm_object,
8291                                         old_entry->offset,
8292                                         (old_entry->vme_end -
8293                                          old_entry->vme_start),
8294                                         ((old_entry->is_shared
8295                                           || old_map->mapped)
8296                                          ? PMAP_NULL :
8297                                          old_map->pmap),
8298                                         old_entry->vme_start,
8299                                         prot);
8300
8301                                 old_entry->needs_copy = TRUE;
8302                         }
8303                         new_entry->needs_copy = new_entry_needs_copy;
8304
8305                         /*
8306                          *      Insert the entry at the end
8307                          *      of the map.
8308                          */
8309
8310                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8311                                           new_entry);
8312                         new_size += entry_size;
8313                         break;
8314
8315                 slow_vm_map_fork_copy:
8316                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8317                                 new_size += entry_size;
8318                         }
8319                         continue;
8320                 }
8321                 old_entry = old_entry->vme_next;
8322         }
8323
8324         new_map->size = new_size;
8325         vm_map_unlock(old_map);
8326         vm_map_deallocate(old_map);
8327
8328         return(new_map);
8329 }
8330
8331 /*
8332  * vm_map_exec:
8333  *
8334  *      Setup the "new_map" with the proper execution environment according
8335  *      to the type of executable (platform, 64bit, chroot environment).
8336  *      Map the comm page and shared region, etc...
8337  */
8338 kern_return_t
8339 vm_map_exec(
8340         vm_map_t        new_map,
8341         task_t          task,
8342         void            *fsroot,
8343         cpu_type_t      cpu)
8344 {
8345         SHARED_REGION_TRACE_DEBUG(
8346                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8347                  current_task(), new_map, task, fsroot, cpu));
8348         (void) vm_commpage_enter(new_map, task);
8349         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8350         SHARED_REGION_TRACE_DEBUG(
8351                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8352                  current_task(), new_map, task, fsroot, cpu));
8353         return KERN_SUCCESS;
8354 }
8355
8356 /*
8357  *      vm_map_lookup_locked:
8358  *
8359  *      Finds the VM object, offset, and
8360  *      protection for a given virtual address in the
8361  *      specified map, assuming a page fault of the
8362  *      type specified.
8363  *
8364  *      Returns the (object, offset, protection) for
8365  *      this address, whether it is wired down, and whether
8366  *      this map has the only reference to the data in question.
8367  *      In order to later verify this lookup, a "version"
8368  *      is returned.
8369  *
8370  *      The map MUST be locked by the caller and WILL be
8371  *      locked on exit.  In order to guarantee the
8372  *      existence of the returned object, it is returned
8373  *      locked.
8374  *
8375  *      If a lookup is requested with "write protection"
8376  *      specified, the map may be changed to perform virtual
8377  *      copying operations, although the data referenced will
8378  *      remain the same.
8379  */
8380 kern_return_t
8381 vm_map_lookup_locked(
8382         vm_map_t                *var_map,       /* IN/OUT */
8383         vm_map_offset_t         vaddr,
8384         vm_prot_t               fault_type,
8385         int                     object_lock_type,
8386         vm_map_version_t        *out_version,   /* OUT */
8387         vm_object_t             *object,        /* OUT */
8388         vm_object_offset_t      *offset,        /* OUT */
8389         vm_prot_t               *out_prot,      /* OUT */
8390         boolean_t               *wired,         /* OUT */
8391         vm_object_fault_info_t  fault_info,     /* OUT */
8392         vm_map_t                *real_map)
8393 {
8394         vm_map_entry_t                  entry;
8395         register vm_map_t               map = *var_map;
8396         vm_map_t                        old_map = *var_map;
8397         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8398         vm_map_offset_t                 cow_parent_vaddr = 0;
8399         vm_map_offset_t                 old_start = 0;
8400         vm_map_offset_t                 old_end = 0;
8401         register vm_prot_t              prot;
8402         boolean_t                       mask_protections;
8403         vm_prot_t                       original_fault_type;
8404
8405         /*
8406          * VM_PROT_MASK means that the caller wants us to use "fault_type"
8407          * as a mask against the mapping's actual protections, not as an
8408          * absolute value.
8409          */
8410         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8411         fault_type &= ~VM_PROT_IS_MASK;
8412         original_fault_type = fault_type;
8413
8414         *real_map = map;
8415
8416 RetryLookup:
8417         fault_type = original_fault_type;
8418
8419         /*
8420          *      If the map has an interesting hint, try it before calling
8421          *      full blown lookup routine.
8422          */
8423         entry = map->hint;
8424
8425         if ((entry == vm_map_to_entry(map)) ||
8426             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8427                 vm_map_entry_t  tmp_entry;
8428
8429                 /*
8430                  *      Entry was either not a valid hint, or the vaddr
8431                  *      was not contained in the entry, so do a full lookup.
8432                  */
8433                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8434                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8435                                 vm_map_unlock(cow_sub_map_parent);
8436                         if((*real_map != map)
8437                            && (*real_map != cow_sub_map_parent))
8438                                 vm_map_unlock(*real_map);
8439                         return KERN_INVALID_ADDRESS;
8440                 }
8441
8442                 entry = tmp_entry;
8443         }
8444         if(map == old_map) {
8445                 old_start = entry->vme_start;
8446                 old_end = entry->vme_end;
8447         }
8448
8449         /*
8450          *      Handle submaps.  Drop lock on upper map, submap is
8451          *      returned locked.
8452          */
8453
8454 submap_recurse:
8455         if (entry->is_sub_map) {
8456                 vm_map_offset_t         local_vaddr;
8457                 vm_map_offset_t         end_delta;
8458                 vm_map_offset_t         start_delta;
8459                 vm_map_entry_t          submap_entry;
8460                 boolean_t               mapped_needs_copy=FALSE;
8461
8462                 local_vaddr = vaddr;
8463
8464                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8465                         /* if real_map equals map we unlock below */
8466                         if ((*real_map != map) &&
8467                             (*real_map != cow_sub_map_parent))
8468                                 vm_map_unlock(*real_map);
8469                         *real_map = entry->object.sub_map;
8470                 }
8471
8472                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8473                         if (!mapped_needs_copy) {
8474                                 if (vm_map_lock_read_to_write(map)) {
8475                                         vm_map_lock_read(map);
8476                                         /* XXX FBDP: entry still valid ? */
8477                                         if(*real_map == entry->object.sub_map)
8478                                                 *real_map = map;
8479                                         goto RetryLookup;
8480                                 }
8481                                 vm_map_lock_read(entry->object.sub_map);
8482                                 cow_sub_map_parent = map;
8483                                 /* reset base to map before cow object */
8484                                 /* this is the map which will accept   */
8485                                 /* the new cow object */
8486                                 old_start = entry->vme_start;
8487                                 old_end = entry->vme_end;
8488                                 cow_parent_vaddr = vaddr;
8489                                 mapped_needs_copy = TRUE;
8490                         } else {
8491                                 vm_map_lock_read(entry->object.sub_map);
8492                                 if((cow_sub_map_parent != map) &&
8493                                    (*real_map != map))
8494                                         vm_map_unlock(map);
8495                         }
8496                 } else {
8497                         vm_map_lock_read(entry->object.sub_map);
8498                         /* leave map locked if it is a target */
8499                         /* cow sub_map above otherwise, just  */
8500                         /* follow the maps down to the object */
8501                         /* here we unlock knowing we are not  */
8502                         /* revisiting the map.  */
8503                         if((*real_map != map) && (map != cow_sub_map_parent))
8504                                 vm_map_unlock_read(map);
8505                 }
8506
8507                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8508                 *var_map = map = entry->object.sub_map;
8509
8510                 /* calculate the offset in the submap for vaddr */
8511                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8512
8513         RetrySubMap:
8514                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8515                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8516                                 vm_map_unlock(cow_sub_map_parent);
8517                         }
8518                         if((*real_map != map)
8519                            && (*real_map != cow_sub_map_parent)) {
8520                                 vm_map_unlock(*real_map);
8521                         }
8522                         *real_map = map;
8523                         return KERN_INVALID_ADDRESS;
8524                 }
8525
8526                 /* find the attenuated shadow of the underlying object */
8527                 /* on our target map */
8528
8529                 /* in english the submap object may extend beyond the     */
8530                 /* region mapped by the entry or, may only fill a portion */
8531                 /* of it.  For our purposes, we only care if the object   */
8532                 /* doesn't fill.  In this case the area which will        */
8533                 /* ultimately be clipped in the top map will only need    */
8534                 /* to be as big as the portion of the underlying entry    */
8535                 /* which is mapped */
8536                 start_delta = submap_entry->vme_start > entry->offset ?
8537                         submap_entry->vme_start - entry->offset : 0;
8538
8539                 end_delta =
8540                         (entry->offset + start_delta + (old_end - old_start)) <=
8541                         submap_entry->vme_end ?
8542                         0 : (entry->offset +
8543                              (old_end - old_start))
8544                         - submap_entry->vme_end;
8545
8546                 old_start += start_delta;
8547                 old_end -= end_delta;
8548
8549                 if(submap_entry->is_sub_map) {
8550                         entry = submap_entry;
8551                         vaddr = local_vaddr;
8552                         goto submap_recurse;
8553                 }
8554
8555                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8556
8557                         vm_object_t     sub_object, copy_object;
8558                         vm_object_offset_t copy_offset;
8559                         vm_map_offset_t local_start;
8560                         vm_map_offset_t local_end;
8561                         boolean_t               copied_slowly = FALSE;
8562
8563                         if (vm_map_lock_read_to_write(map)) {
8564                                 vm_map_lock_read(map);
8565                                 old_start -= start_delta;
8566                                 old_end += end_delta;
8567                                 goto RetrySubMap;
8568                         }
8569
8570
8571                         sub_object = submap_entry->object.vm_object;
8572                         if (sub_object == VM_OBJECT_NULL) {
8573                                 sub_object =
8574                                         vm_object_allocate(
8575                                                 (vm_map_size_t)
8576                                                 (submap_entry->vme_end -
8577                                                  submap_entry->vme_start));
8578                                 submap_entry->object.vm_object = sub_object;
8579                                 submap_entry->offset = 0;
8580                         }
8581                         local_start =  local_vaddr -
8582                                 (cow_parent_vaddr - old_start);
8583                         local_end = local_vaddr +
8584                                 (old_end - cow_parent_vaddr);
8585                         vm_map_clip_start(map, submap_entry, local_start);
8586                         vm_map_clip_end(map, submap_entry, local_end);
8587                         /* unnesting was done in vm_map_clip_start/end() */
8588                         assert(!submap_entry->use_pmap);
8589
8590                         /* This is the COW case, lets connect */
8591                         /* an entry in our space to the underlying */
8592                         /* object in the submap, bypassing the  */
8593                         /* submap. */
8594
8595
8596                         if(submap_entry->wired_count != 0 ||
8597                            (sub_object->copy_strategy ==
8598                             MEMORY_OBJECT_COPY_NONE)) {
8599                                 vm_object_lock(sub_object);
8600                                 vm_object_copy_slowly(sub_object,
8601                                                       submap_entry->offset,
8602                                                       (submap_entry->vme_end -
8603                                                        submap_entry->vme_start),
8604                                                       FALSE,
8605                                                       &copy_object);
8606                                 copied_slowly = TRUE;
8607                         } else {
8608
8609                                 /* set up shadow object */
8610                                 copy_object = sub_object;
8611                                 vm_object_reference(copy_object);
8612                                 sub_object->shadowed = TRUE;
8613                                 submap_entry->needs_copy = TRUE;
8614
8615                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8616
8617                                 if (override_nx(map, submap_entry->alias) && prot)
8618                                         prot |= VM_PROT_EXECUTE;
8619
8620                                 vm_object_pmap_protect(
8621                                         sub_object,
8622                                         submap_entry->offset,
8623                                         submap_entry->vme_end -
8624                                         submap_entry->vme_start,
8625                                         (submap_entry->is_shared
8626                                          || map->mapped) ?
8627                                         PMAP_NULL : map->pmap,
8628                                         submap_entry->vme_start,
8629                                         prot);
8630                         }
8631
8632                         /*
8633                          * Adjust the fault offset to the submap entry.
8634                          */
8635                         copy_offset = (local_vaddr -
8636                                        submap_entry->vme_start +
8637                                        submap_entry->offset);
8638
8639                         /* This works diffently than the   */
8640                         /* normal submap case. We go back  */
8641                         /* to the parent of the cow map and*/
8642                         /* clip out the target portion of  */
8643                         /* the sub_map, substituting the   */
8644                         /* new copy object,                */
8645
8646                         vm_map_unlock(map);
8647                         local_start = old_start;
8648                         local_end = old_end;
8649                         map = cow_sub_map_parent;
8650                         *var_map = cow_sub_map_parent;
8651                         vaddr = cow_parent_vaddr;
8652                         cow_sub_map_parent = NULL;
8653
8654                         if(!vm_map_lookup_entry(map,
8655                                                 vaddr, &entry)) {
8656                                 vm_object_deallocate(
8657                                         copy_object);
8658                                 vm_map_lock_write_to_read(map);
8659                                 return KERN_INVALID_ADDRESS;
8660                         }
8661
8662                         /* clip out the portion of space */
8663                         /* mapped by the sub map which   */
8664                         /* corresponds to the underlying */
8665                         /* object */
8666
8667                         /*
8668                          * Clip (and unnest) the smallest nested chunk
8669                          * possible around the faulting address...
8670                          */
8671                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8672                         local_end = local_start + pmap_nesting_size_min;
8673                         /*
8674                          * ... but don't go beyond the "old_start" to "old_end"
8675                          * range, to avoid spanning over another VM region
8676                          * with a possibly different VM object and/or offset.
8677                          */
8678                         if (local_start < old_start) {
8679                                 local_start = old_start;
8680                         }
8681                         if (local_end > old_end) {
8682                                 local_end = old_end;
8683                         }
8684                         /*
8685                          * Adjust copy_offset to the start of the range.
8686                          */
8687                         copy_offset -= (vaddr - local_start);
8688
8689                         vm_map_clip_start(map, entry, local_start);
8690                         vm_map_clip_end(map, entry, local_end);
8691                         /* unnesting was done in vm_map_clip_start/end() */
8692                         assert(!entry->use_pmap);
8693
8694                         /* substitute copy object for */
8695                         /* shared map entry           */
8696                         vm_map_deallocate(entry->object.sub_map);
8697                         entry->is_sub_map = FALSE;
8698                         entry->object.vm_object = copy_object;
8699
8700                         /* propagate the submap entry's protections */
8701                         entry->protection |= submap_entry->protection;
8702                         entry->max_protection |= submap_entry->max_protection;
8703
8704                         if(copied_slowly) {
8705                                 entry->offset = local_start - old_start;
8706                                 entry->needs_copy = FALSE;
8707                                 entry->is_shared = FALSE;
8708                         } else {
8709                                 entry->offset = copy_offset;
8710                                 entry->needs_copy = TRUE;
8711                                 if(entry->inheritance == VM_INHERIT_SHARE)
8712                                         entry->inheritance = VM_INHERIT_COPY;
8713                                 if (map != old_map)
8714                                         entry->is_shared = TRUE;
8715                         }
8716                         if(entry->inheritance == VM_INHERIT_SHARE)
8717                                 entry->inheritance = VM_INHERIT_COPY;
8718
8719                         vm_map_lock_write_to_read(map);
8720                 } else {
8721                         if((cow_sub_map_parent)
8722                            && (cow_sub_map_parent != *real_map)
8723                            && (cow_sub_map_parent != map)) {
8724                                 vm_map_unlock(cow_sub_map_parent);
8725                         }
8726                         entry = submap_entry;
8727                         vaddr = local_vaddr;
8728                 }
8729         }
8730
8731         /*
8732          *      Check whether this task is allowed to have
8733          *      this page.
8734          */
8735
8736         prot = entry->protection;
8737
8738         if (override_nx(map, entry->alias) && prot) {
8739                 /*
8740                  * HACK -- if not a stack, then allow execution
8741                  */
8742                 prot |= VM_PROT_EXECUTE;
8743         }
8744
8745         if (mask_protections) {
8746                 fault_type &= prot;
8747                 if (fault_type == VM_PROT_NONE) {
8748                         goto protection_failure;
8749                 }
8750         }
8751         if ((fault_type & (prot)) != fault_type) {
8752         protection_failure:
8753                 if (*real_map != map) {
8754                         vm_map_unlock(*real_map);
8755                 }
8756                 *real_map = map;
8757
8758                 if ((fault_type & VM_PROT_EXECUTE) && prot)
8759                         log_stack_execution_failure((addr64_t)vaddr, prot);
8760
8761                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8762                 return KERN_PROTECTION_FAILURE;
8763         }
8764
8765         /*
8766          *      If this page is not pageable, we have to get
8767          *      it for all possible accesses.
8768          */
8769
8770         *wired = (entry->wired_count != 0);
8771         if (*wired)
8772                 fault_type = prot;
8773
8774         /*
8775          *      If the entry was copy-on-write, we either ...
8776          */
8777
8778         if (entry->needs_copy) {
8779                 /*
8780                  *      If we want to write the page, we may as well
8781                  *      handle that now since we've got the map locked.
8782                  *
8783                  *      If we don't need to write the page, we just
8784                  *      demote the permissions allowed.
8785                  */
8786
8787                 if ((fault_type & VM_PROT_WRITE) || *wired) {
8788                         /*
8789                          *      Make a new object, and place it in the
8790                          *      object chain.  Note that no new references
8791                          *      have appeared -- one just moved from the
8792                          *      map to the new object.
8793                          */
8794
8795                         if (vm_map_lock_read_to_write(map)) {
8796                                 vm_map_lock_read(map);
8797                                 goto RetryLookup;
8798                         }
8799                         vm_object_shadow(&entry->object.vm_object,
8800                                          &entry->offset,
8801                                          (vm_map_size_t) (entry->vme_end -
8802                                                           entry->vme_start));
8803
8804                         entry->object.vm_object->shadowed = TRUE;
8805                         entry->needs_copy = FALSE;
8806                         vm_map_lock_write_to_read(map);
8807                 }
8808                 else {
8809                         /*
8810                          *      We're attempting to read a copy-on-write
8811                          *      page -- don't allow writes.
8812                          */
8813
8814                         prot &= (~VM_PROT_WRITE);
8815                 }
8816         }
8817
8818         /*
8819          *      Create an object if necessary.
8820          */
8821         if (entry->object.vm_object == VM_OBJECT_NULL) {
8822
8823                 if (vm_map_lock_read_to_write(map)) {
8824                         vm_map_lock_read(map);
8825                         goto RetryLookup;
8826                 }
8827
8828                 entry->object.vm_object = vm_object_allocate(
8829                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
8830                 entry->offset = 0;
8831                 vm_map_lock_write_to_read(map);
8832         }
8833
8834         /*
8835          *      Return the object/offset from this entry.  If the entry
8836          *      was copy-on-write or empty, it has been fixed up.  Also
8837          *      return the protection.
8838          */
8839
8840         *offset = (vaddr - entry->vme_start) + entry->offset;
8841         *object = entry->object.vm_object;
8842         *out_prot = prot;
8843
8844         if (fault_info) {
8845                 fault_info->interruptible = THREAD_UNINT; /* for now... */
8846                 /* ... the caller will change "interruptible" if needed */
8847                 fault_info->cluster_size = 0;
8848                 fault_info->user_tag = entry->alias;
8849                 fault_info->behavior = entry->behavior;
8850                 fault_info->lo_offset = entry->offset;
8851                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8852                 fault_info->no_cache  = entry->no_cache;
8853                 fault_info->stealth = FALSE;
8854                 fault_info->io_sync = FALSE;
8855                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
8856                 fault_info->mark_zf_absent = FALSE;
8857         }
8858
8859         /*
8860          *      Lock the object to prevent it from disappearing
8861          */
8862         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8863                 vm_object_lock(*object);
8864         else
8865                 vm_object_lock_shared(*object);
8866
8867         /*
8868          *      Save the version number
8869          */
8870
8871         out_version->main_timestamp = map->timestamp;
8872
8873         return KERN_SUCCESS;
8874 }
8875
8876
8877 /*
8878  *      vm_map_verify:
8879  *
8880  *      Verifies that the map in question has not changed
8881  *      since the given version.  If successful, the map
8882  *      will not change until vm_map_verify_done() is called.
8883  */
8884 boolean_t
8885 vm_map_verify(
8886         register vm_map_t               map,
8887         register vm_map_version_t       *version)       /* REF */
8888 {
8889         boolean_t       result;
8890
8891         vm_map_lock_read(map);
8892         result = (map->timestamp == version->main_timestamp);
8893
8894         if (!result)
8895                 vm_map_unlock_read(map);
8896
8897         return(result);
8898 }
8899
8900 /*
8901  *      vm_map_verify_done:
8902  *
8903  *      Releases locks acquired by a vm_map_verify.
8904  *
8905  *      This is now a macro in vm/vm_map.h.  It does a
8906  *      vm_map_unlock_read on the map.
8907  */
8908
8909
8910 /*
8911  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8912  *      Goes away after regular vm_region_recurse function migrates to
8913  *      64 bits
8914  *      vm_region_recurse: A form of vm_region which follows the
8915  *      submaps in a target map
8916  *
8917  */
8918
8919 kern_return_t
8920 vm_map_region_recurse_64(
8921         vm_map_t                 map,
8922         vm_map_offset_t *address,               /* IN/OUT */
8923         vm_map_size_t           *size,                  /* OUT */
8924         natural_t               *nesting_depth, /* IN/OUT */
8925         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
8926         mach_msg_type_number_t  *count) /* IN/OUT */
8927 {
8928         vm_region_extended_info_data_t  extended;
8929         vm_map_entry_t                  tmp_entry;
8930         vm_map_offset_t                 user_address;
8931         unsigned int                    user_max_depth;
8932
8933         /*
8934          * "curr_entry" is the VM map entry preceding or including the
8935          * address we're looking for.
8936          * "curr_map" is the map or sub-map containing "curr_entry".
8937          * "curr_address" is the equivalent of the top map's "user_address"
8938          * in the current map.
8939          * "curr_offset" is the cumulated offset of "curr_map" in the
8940          * target task's address space.
8941          * "curr_depth" is the depth of "curr_map" in the chain of
8942          * sub-maps.
8943          *
8944          * "curr_max_below" and "curr_max_above" limit the range (around
8945          * "curr_address") we should take into account in the current (sub)map.
8946          * They limit the range to what's visible through the map entries
8947          * we've traversed from the top map to the current map.
8948
8949          */
8950         vm_map_entry_t                  curr_entry;
8951         vm_map_address_t                curr_address;
8952         vm_map_offset_t                 curr_offset;
8953         vm_map_t                        curr_map;
8954         unsigned int                    curr_depth;
8955         vm_map_offset_t                 curr_max_below, curr_max_above;
8956         vm_map_offset_t                 curr_skip;
8957
8958         /*
8959          * "next_" is the same as "curr_" but for the VM region immediately
8960          * after the address we're looking for.  We need to keep track of this
8961          * too because we want to return info about that region if the
8962          * address we're looking for is not mapped.
8963          */
8964         vm_map_entry_t                  next_entry;
8965         vm_map_offset_t                 next_offset;
8966         vm_map_offset_t                 next_address;
8967         vm_map_t                        next_map;
8968         unsigned int                    next_depth;
8969         vm_map_offset_t                 next_max_below, next_max_above;
8970         vm_map_offset_t                 next_skip;
8971
8972         boolean_t                       look_for_pages;
8973         vm_region_submap_short_info_64_t short_info;
8974
8975         if (map == VM_MAP_NULL) {
8976                 /* no address space to work on */
8977                 return KERN_INVALID_ARGUMENT;
8978         }
8979
8980         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8981                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8982                         /*
8983                          * "info" structure is not big enough and
8984                          * would overflow
8985                          */
8986                         return KERN_INVALID_ARGUMENT;
8987                 } else {
8988                         look_for_pages = FALSE;
8989                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8990                         short_info = (vm_region_submap_short_info_64_t) submap_info;
8991                         submap_info = NULL;
8992                 }
8993         } else {
8994                 look_for_pages = TRUE;
8995                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8996                 short_info = NULL;
8997         }
8998
8999
9000         user_address = *address;
9001         user_max_depth = *nesting_depth;
9002
9003         curr_entry = NULL;
9004         curr_map = map;
9005         curr_address = user_address;
9006         curr_offset = 0;
9007         curr_skip = 0;
9008         curr_depth = 0;
9009         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9010         curr_max_below = curr_address;
9011
9012         next_entry = NULL;
9013         next_map = NULL;
9014         next_address = 0;
9015         next_offset = 0;
9016         next_skip = 0;
9017         next_depth = 0;
9018         next_max_above = (vm_map_offset_t) -1;
9019         next_max_below = (vm_map_offset_t) -1;
9020
9021         if (not_in_kdp) {
9022                 vm_map_lock_read(curr_map);
9023         }
9024
9025         for (;;) {
9026                 if (vm_map_lookup_entry(curr_map,
9027                                         curr_address,
9028                                         &tmp_entry)) {
9029                         /* tmp_entry contains the address we're looking for */
9030                         curr_entry = tmp_entry;
9031                 } else {
9032                         vm_map_offset_t skip;
9033                         /*
9034                          * The address is not mapped.  "tmp_entry" is the
9035                          * map entry preceding the address.  We want the next
9036                          * one, if it exists.
9037                          */
9038                         curr_entry = tmp_entry->vme_next;
9039
9040                         if (curr_entry == vm_map_to_entry(curr_map) ||
9041                             (curr_entry->vme_start >=
9042                              curr_address + curr_max_above)) {
9043                                 /* no next entry at this level: stop looking */
9044                                 if (not_in_kdp) {
9045                                         vm_map_unlock_read(curr_map);
9046                                 }
9047                                 curr_entry = NULL;
9048                                 curr_map = NULL;
9049                                 curr_offset = 0;
9050                                 curr_depth = 0;
9051                                 curr_max_above = 0;
9052                                 curr_max_below = 0;
9053                                 break;
9054                         }
9055
9056                         /* adjust current address and offset */
9057                         skip = curr_entry->vme_start - curr_address;
9058                         curr_address = curr_entry->vme_start;
9059                         curr_skip = skip;
9060                         curr_offset += skip;
9061                         curr_max_above -= skip;
9062                         curr_max_below = 0;
9063                 }
9064
9065                 /*
9066                  * Is the next entry at this level closer to the address (or
9067                  * deeper in the submap chain) than the one we had
9068                  * so far ?
9069                  */
9070                 tmp_entry = curr_entry->vme_next;
9071                 if (tmp_entry == vm_map_to_entry(curr_map)) {
9072                         /* no next entry at this level */
9073                 } else if (tmp_entry->vme_start >=
9074                            curr_address + curr_max_above) {
9075                         /*
9076                          * tmp_entry is beyond the scope of what we mapped of
9077                          * this submap in the upper level: ignore it.
9078                          */
9079                 } else if ((next_entry == NULL) ||
9080                            (tmp_entry->vme_start + curr_offset <=
9081                             next_entry->vme_start + next_offset)) {
9082                         /*
9083                          * We didn't have a "next_entry" or this one is
9084                          * closer to the address we're looking for:
9085                          * use this "tmp_entry" as the new "next_entry".
9086                          */
9087                         if (next_entry != NULL) {
9088                                 /* unlock the last "next_map" */
9089                                 if (next_map != curr_map && not_in_kdp) {
9090                                         vm_map_unlock_read(next_map);
9091                                 }
9092                         }
9093                         next_entry = tmp_entry;
9094                         next_map = curr_map;
9095                         next_depth = curr_depth;
9096                         next_address = next_entry->vme_start;
9097                         next_skip = curr_skip;
9098                         next_offset = curr_offset;
9099                         next_offset += (next_address - curr_address);
9100                         next_max_above = MIN(next_max_above, curr_max_above);
9101                         next_max_above = MIN(next_max_above,
9102                                              next_entry->vme_end - next_address);
9103                         next_max_below = MIN(next_max_below, curr_max_below);
9104                         next_max_below = MIN(next_max_below,
9105                                              next_address - next_entry->vme_start);
9106                 }
9107
9108                 /*
9109                  * "curr_max_{above,below}" allow us to keep track of the
9110                  * portion of the submap that is actually mapped at this level:
9111                  * the rest of that submap is irrelevant to us, since it's not
9112                  * mapped here.
9113                  * The relevant portion of the map starts at
9114                  * "curr_entry->offset" up to the size of "curr_entry".
9115                  */
9116                 curr_max_above = MIN(curr_max_above,
9117                                      curr_entry->vme_end - curr_address);
9118                 curr_max_below = MIN(curr_max_below,
9119                                      curr_address - curr_entry->vme_start);
9120
9121                 if (!curr_entry->is_sub_map ||
9122                     curr_depth >= user_max_depth) {
9123                         /*
9124                          * We hit a leaf map or we reached the maximum depth
9125                          * we could, so stop looking.  Keep the current map
9126                          * locked.
9127                          */
9128                         break;
9129                 }
9130
9131                 /*
9132                  * Get down to the next submap level.
9133                  */
9134
9135                 /*
9136                  * Lock the next level and unlock the current level,
9137                  * unless we need to keep it locked to access the "next_entry"
9138                  * later.
9139                  */
9140                 if (not_in_kdp) {
9141                         vm_map_lock_read(curr_entry->object.sub_map);
9142                 }
9143                 if (curr_map == next_map) {
9144                         /* keep "next_map" locked in case we need it */
9145                 } else {
9146                         /* release this map */
9147                         if (not_in_kdp)
9148                                 vm_map_unlock_read(curr_map);
9149                 }
9150
9151                 /*
9152                  * Adjust the offset.  "curr_entry" maps the submap
9153                  * at relative address "curr_entry->vme_start" in the
9154                  * curr_map but skips the first "curr_entry->offset"
9155                  * bytes of the submap.
9156                  * "curr_offset" always represents the offset of a virtual
9157                  * address in the curr_map relative to the absolute address
9158                  * space (i.e. the top-level VM map).
9159                  */
9160                 curr_offset +=
9161                         (curr_entry->offset - curr_entry->vme_start);
9162                 curr_address = user_address + curr_offset;
9163                 /* switch to the submap */
9164                 curr_map = curr_entry->object.sub_map;
9165                 curr_depth++;
9166                 curr_entry = NULL;
9167         }
9168
9169         if (curr_entry == NULL) {
9170                 /* no VM region contains the address... */
9171                 if (next_entry == NULL) {
9172                         /* ... and no VM region follows it either */
9173                         return KERN_INVALID_ADDRESS;
9174                 }
9175                 /* ... gather info about the next VM region */
9176                 curr_entry = next_entry;
9177                 curr_map = next_map;    /* still locked ... */
9178                 curr_address = next_address;
9179                 curr_skip = next_skip;
9180                 curr_offset = next_offset;
9181                 curr_depth = next_depth;
9182                 curr_max_above = next_max_above;
9183                 curr_max_below = next_max_below;
9184                 if (curr_map == map) {
9185                         user_address = curr_address;
9186                 }
9187         } else {
9188                 /* we won't need "next_entry" after all */
9189                 if (next_entry != NULL) {
9190                         /* release "next_map" */
9191                         if (next_map != curr_map && not_in_kdp) {
9192                                 vm_map_unlock_read(next_map);
9193                         }
9194                 }
9195         }
9196         next_entry = NULL;
9197         next_map = NULL;
9198         next_offset = 0;
9199         next_skip = 0;
9200         next_depth = 0;
9201         next_max_below = -1;
9202         next_max_above = -1;
9203
9204         *nesting_depth = curr_depth;
9205         *size = curr_max_above + curr_max_below;
9206         *address = user_address + curr_skip - curr_max_below;
9207
9208 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9209 // so probably should be a real 32b ID vs. ptr.
9210 // Current users just check for equality
9211 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9212
9213         if (look_for_pages) {
9214                 submap_info->user_tag = curr_entry->alias;
9215                 submap_info->offset = curr_entry->offset;
9216                 submap_info->protection = curr_entry->protection;
9217                 submap_info->inheritance = curr_entry->inheritance;
9218                 submap_info->max_protection = curr_entry->max_protection;
9219                 submap_info->behavior = curr_entry->behavior;
9220                 submap_info->user_wired_count = curr_entry->user_wired_count;
9221                 submap_info->is_submap = curr_entry->is_sub_map;
9222                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9223         } else {
9224                 short_info->user_tag = curr_entry->alias;
9225                 short_info->offset = curr_entry->offset;
9226                 short_info->protection = curr_entry->protection;
9227                 short_info->inheritance = curr_entry->inheritance;
9228                 short_info->max_protection = curr_entry->max_protection;
9229                 short_info->behavior = curr_entry->behavior;
9230                 short_info->user_wired_count = curr_entry->user_wired_count;
9231                 short_info->is_submap = curr_entry->is_sub_map;
9232                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9233         }
9234
9235         extended.pages_resident = 0;
9236         extended.pages_swapped_out = 0;
9237         extended.pages_shared_now_private = 0;
9238         extended.pages_dirtied = 0;
9239         extended.external_pager = 0;
9240         extended.shadow_depth = 0;
9241
9242         if (not_in_kdp) {
9243                 if (!curr_entry->is_sub_map) {
9244                         vm_map_offset_t range_start, range_end;
9245                         range_start = MAX((curr_address - curr_max_below),
9246                                           curr_entry->vme_start);
9247                         range_end = MIN((curr_address + curr_max_above),
9248                                         curr_entry->vme_end);
9249                         vm_map_region_walk(curr_map,
9250                                            range_start,
9251                                            curr_entry,
9252                                            (curr_entry->offset +
9253                                             (range_start -
9254                                              curr_entry->vme_start)),
9255                                            range_end - range_start,
9256                                            &extended,
9257                                            look_for_pages);
9258                         if (extended.external_pager &&
9259                             extended.ref_count == 2 &&
9260                             extended.share_mode == SM_SHARED) {
9261                                 extended.share_mode = SM_PRIVATE;
9262                         }
9263                 } else {
9264                         if (curr_entry->use_pmap) {
9265                                 extended.share_mode = SM_TRUESHARED;
9266                         } else {
9267                                 extended.share_mode = SM_PRIVATE;
9268                         }
9269                         extended.ref_count =
9270                                 curr_entry->object.sub_map->ref_count;
9271                 }
9272         }
9273
9274         if (look_for_pages) {
9275                 submap_info->pages_resident = extended.pages_resident;
9276                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9277                 submap_info->pages_shared_now_private =
9278                         extended.pages_shared_now_private;
9279                 submap_info->pages_dirtied = extended.pages_dirtied;
9280                 submap_info->external_pager = extended.external_pager;
9281                 submap_info->shadow_depth = extended.shadow_depth;
9282                 submap_info->share_mode = extended.share_mode;
9283                 submap_info->ref_count = extended.ref_count;
9284         } else {
9285                 short_info->external_pager = extended.external_pager;
9286                 short_info->shadow_depth = extended.shadow_depth;
9287                 short_info->share_mode = extended.share_mode;
9288                 short_info->ref_count = extended.ref_count;
9289         }
9290
9291         if (not_in_kdp) {
9292                 vm_map_unlock_read(curr_map);
9293         }
9294
9295         return KERN_SUCCESS;
9296 }
9297
9298 /*
9299  *      vm_region:
9300  *
9301  *      User call to obtain information about a region in
9302  *      a task's address map. Currently, only one flavor is
9303  *      supported.
9304  *
9305  *      XXX The reserved and behavior fields cannot be filled
9306  *          in until the vm merge from the IK is completed, and
9307  *          vm_reserve is implemented.
9308  */
9309
9310 kern_return_t
9311 vm_map_region(
9312         vm_map_t                 map,
9313         vm_map_offset_t *address,               /* IN/OUT */
9314         vm_map_size_t           *size,                  /* OUT */
9315         vm_region_flavor_t       flavor,                /* IN */
9316         vm_region_info_t         info,                  /* OUT */
9317         mach_msg_type_number_t  *count, /* IN/OUT */
9318         mach_port_t             *object_name)           /* OUT */
9319 {
9320         vm_map_entry_t          tmp_entry;
9321         vm_map_entry_t          entry;
9322         vm_map_offset_t         start;
9323
9324         if (map == VM_MAP_NULL)
9325                 return(KERN_INVALID_ARGUMENT);
9326
9327         switch (flavor) {
9328
9329         case VM_REGION_BASIC_INFO:
9330                 /* legacy for old 32-bit objects info */
9331         {
9332                 vm_region_basic_info_t  basic;
9333
9334                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9335                         return(KERN_INVALID_ARGUMENT);
9336
9337                 basic = (vm_region_basic_info_t) info;
9338                 *count = VM_REGION_BASIC_INFO_COUNT;
9339
9340                 vm_map_lock_read(map);
9341
9342                 start = *address;
9343                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9344                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9345                                 vm_map_unlock_read(map);
9346                                 return(KERN_INVALID_ADDRESS);
9347                         }
9348                 } else {
9349                         entry = tmp_entry;
9350                 }
9351
9352                 start = entry->vme_start;
9353
9354                 basic->offset = (uint32_t)entry->offset;
9355                 basic->protection = entry->protection;
9356                 basic->inheritance = entry->inheritance;
9357                 basic->max_protection = entry->max_protection;
9358                 basic->behavior = entry->behavior;
9359                 basic->user_wired_count = entry->user_wired_count;
9360                 basic->reserved = entry->is_sub_map;
9361                 *address = start;
9362                 *size = (entry->vme_end - start);
9363
9364                 if (object_name) *object_name = IP_NULL;
9365                 if (entry->is_sub_map) {
9366                         basic->shared = FALSE;
9367                 } else {
9368                         basic->shared = entry->is_shared;
9369                 }
9370
9371                 vm_map_unlock_read(map);
9372                 return(KERN_SUCCESS);
9373         }
9374
9375         case VM_REGION_BASIC_INFO_64:
9376         {
9377                 vm_region_basic_info_64_t       basic;
9378
9379                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9380                         return(KERN_INVALID_ARGUMENT);
9381
9382                 basic = (vm_region_basic_info_64_t) info;
9383                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9384
9385                 vm_map_lock_read(map);
9386
9387                 start = *address;
9388                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9389                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9390                                 vm_map_unlock_read(map);
9391                                 return(KERN_INVALID_ADDRESS);
9392                         }
9393                 } else {
9394                         entry = tmp_entry;
9395                 }
9396
9397                 start = entry->vme_start;
9398
9399                 basic->offset = entry->offset;
9400                 basic->protection = entry->protection;
9401                 basic->inheritance = entry->inheritance;
9402                 basic->max_protection = entry->max_protection;
9403                 basic->behavior = entry->behavior;
9404                 basic->user_wired_count = entry->user_wired_count;
9405                 basic->reserved = entry->is_sub_map;
9406                 *address = start;
9407                 *size = (entry->vme_end - start);
9408
9409                 if (object_name) *object_name = IP_NULL;
9410                 if (entry->is_sub_map) {
9411                         basic->shared = FALSE;
9412                 } else {
9413                         basic->shared = entry->is_shared;
9414                 }
9415
9416                 vm_map_unlock_read(map);
9417                 return(KERN_SUCCESS);
9418         }
9419         case VM_REGION_EXTENDED_INFO:
9420         {
9421                 vm_region_extended_info_t       extended;
9422
9423                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9424                         return(KERN_INVALID_ARGUMENT);
9425
9426                 extended = (vm_region_extended_info_t) info;
9427                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9428
9429                 vm_map_lock_read(map);
9430
9431                 start = *address;
9432                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9433                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9434                                 vm_map_unlock_read(map);
9435                                 return(KERN_INVALID_ADDRESS);
9436                         }
9437                 } else {
9438                         entry = tmp_entry;
9439                 }
9440                 start = entry->vme_start;
9441
9442                 extended->protection = entry->protection;
9443                 extended->user_tag = entry->alias;
9444                 extended->pages_resident = 0;
9445                 extended->pages_swapped_out = 0;
9446                 extended->pages_shared_now_private = 0;
9447                 extended->pages_dirtied = 0;
9448                 extended->external_pager = 0;
9449                 extended->shadow_depth = 0;
9450
9451                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9452
9453                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9454                         extended->share_mode = SM_PRIVATE;
9455
9456                 if (object_name)
9457                         *object_name = IP_NULL;
9458                 *address = start;
9459                 *size = (entry->vme_end - start);
9460
9461                 vm_map_unlock_read(map);
9462                 return(KERN_SUCCESS);
9463         }
9464         case VM_REGION_TOP_INFO:
9465         {
9466                 vm_region_top_info_t    top;
9467
9468                 if (*count < VM_REGION_TOP_INFO_COUNT)
9469                         return(KERN_INVALID_ARGUMENT);
9470
9471                 top = (vm_region_top_info_t) info;
9472                 *count = VM_REGION_TOP_INFO_COUNT;
9473
9474                 vm_map_lock_read(map);
9475
9476                 start = *address;
9477                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9478                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9479                                 vm_map_unlock_read(map);
9480                                 return(KERN_INVALID_ADDRESS);
9481                         }
9482                 } else {
9483                         entry = tmp_entry;
9484
9485                 }
9486                 start = entry->vme_start;
9487
9488                 top->private_pages_resident = 0;
9489                 top->shared_pages_resident = 0;
9490
9491                 vm_map_region_top_walk(entry, top);
9492
9493                 if (object_name)
9494                         *object_name = IP_NULL;
9495                 *address = start;
9496                 *size = (entry->vme_end - start);
9497
9498                 vm_map_unlock_read(map);
9499                 return(KERN_SUCCESS);
9500         }
9501         default:
9502                 return(KERN_INVALID_ARGUMENT);
9503         }
9504 }
9505
9506 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9507         MIN((entry_size),                                               \
9508             ((obj)->all_reusable ?                                      \
9509              (obj)->wired_page_count :                                  \
9510              (obj)->resident_page_count - (obj)->reusable_page_count))
9511
9512 void
9513 vm_map_region_top_walk(
9514         vm_map_entry_t             entry,
9515         vm_region_top_info_t       top)
9516 {
9517
9518         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9519                 top->share_mode = SM_EMPTY;
9520                 top->ref_count = 0;
9521                 top->obj_id = 0;
9522                 return;
9523         }
9524
9525         {
9526                 struct  vm_object *obj, *tmp_obj;
9527                 int             ref_count;
9528                 uint32_t        entry_size;
9529
9530                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9531
9532                 obj = entry->object.vm_object;
9533
9534                 vm_object_lock(obj);
9535
9536                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9537                         ref_count--;
9538
9539                 assert(obj->reusable_page_count <= obj->resident_page_count);
9540                 if (obj->shadow) {
9541                         if (ref_count == 1)
9542                                 top->private_pages_resident =
9543                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9544                         else
9545                                 top->shared_pages_resident =
9546                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9547                         top->ref_count  = ref_count;
9548                         top->share_mode = SM_COW;
9549
9550                         while ((tmp_obj = obj->shadow)) {
9551                                 vm_object_lock(tmp_obj);
9552                                 vm_object_unlock(obj);
9553                                 obj = tmp_obj;
9554
9555                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9556                                         ref_count--;
9557
9558                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9559                                 top->shared_pages_resident +=
9560                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9561                                 top->ref_count += ref_count - 1;
9562                         }
9563                 } else {
9564                         if (entry->superpage_size) {
9565                                 top->share_mode = SM_LARGE_PAGE;
9566                                 top->shared_pages_resident = 0;
9567                                 top->private_pages_resident = entry_size;
9568                         } else if (entry->needs_copy) {
9569                                 top->share_mode = SM_COW;
9570                                 top->shared_pages_resident =
9571                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9572                         } else {
9573                                 if (ref_count == 1 ||
9574                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9575                                         top->share_mode = SM_PRIVATE;
9576                                         top->private_pages_resident =
9577                                                 OBJ_RESIDENT_COUNT(obj,
9578                                                                    entry_size);
9579                                 } else {
9580                                         top->share_mode = SM_SHARED;
9581                                         top->shared_pages_resident =
9582                                                 OBJ_RESIDENT_COUNT(obj,
9583                                                                   entry_size);
9584                                 }
9585                         }
9586                         top->ref_count = ref_count;
9587                 }
9588                 /* XXX K64: obj_id will be truncated */
9589                 top->obj_id = (unsigned int) (uintptr_t)obj;
9590
9591                 vm_object_unlock(obj);
9592         }
9593 }
9594
9595 void
9596 vm_map_region_walk(
9597         vm_map_t                        map,
9598         vm_map_offset_t                 va,
9599         vm_map_entry_t                  entry,
9600         vm_object_offset_t              offset,
9601         vm_object_size_t                range,
9602         vm_region_extended_info_t       extended,
9603         boolean_t                       look_for_pages)
9604 {
9605         register struct vm_object *obj, *tmp_obj;
9606         register vm_map_offset_t       last_offset;
9607         register int               i;
9608         register int               ref_count;
9609         struct vm_object        *shadow_object;
9610         int                     shadow_depth;
9611
9612         if ((entry->object.vm_object == 0) ||
9613             (entry->is_sub_map) ||
9614             (entry->object.vm_object->phys_contiguous &&
9615              !entry->superpage_size)) {
9616                 extended->share_mode = SM_EMPTY;
9617                 extended->ref_count = 0;
9618                 return;
9619         }
9620
9621         if (entry->superpage_size) {
9622                 extended->shadow_depth = 0;
9623                 extended->share_mode = SM_LARGE_PAGE;
9624                 extended->ref_count = 1;
9625                 extended->external_pager = 0;
9626                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9627                 extended->shadow_depth = 0;
9628                 return;
9629         }
9630
9631         {
9632                 obj = entry->object.vm_object;
9633
9634                 vm_object_lock(obj);
9635
9636                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9637                         ref_count--;
9638
9639                 if (look_for_pages) {
9640                         for (last_offset = offset + range;
9641                              offset < last_offset;
9642                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9643                                 vm_map_region_look_for_page(map, va, obj,
9644                                                             offset, ref_count,
9645                                                             0, extended);
9646                 } else {
9647                         shadow_object = obj->shadow;
9648                         shadow_depth = 0;
9649
9650                         if ( !(obj->pager_trusted) && !(obj->internal))
9651                                 extended->external_pager = 1;
9652
9653                         if (shadow_object != VM_OBJECT_NULL) {
9654                                 vm_object_lock(shadow_object);
9655                                 for (;
9656                                      shadow_object != VM_OBJECT_NULL;
9657                                      shadow_depth++) {
9658                                         vm_object_t     next_shadow;
9659
9660                                         if ( !(shadow_object->pager_trusted) &&
9661                                              !(shadow_object->internal))
9662                                                 extended->external_pager = 1;
9663
9664                                         next_shadow = shadow_object->shadow;
9665                                         if (next_shadow) {
9666                                                 vm_object_lock(next_shadow);
9667                                         }
9668                                         vm_object_unlock(shadow_object);
9669                                         shadow_object = next_shadow;
9670                                 }
9671                         }
9672                         extended->shadow_depth = shadow_depth;
9673                 }
9674
9675                 if (extended->shadow_depth || entry->needs_copy)
9676                         extended->share_mode = SM_COW;
9677                 else {
9678                         if (ref_count == 1)
9679                                 extended->share_mode = SM_PRIVATE;
9680                         else {
9681                                 if (obj->true_share)
9682                                         extended->share_mode = SM_TRUESHARED;
9683                                 else
9684                                         extended->share_mode = SM_SHARED;
9685                         }
9686                 }
9687                 extended->ref_count = ref_count - extended->shadow_depth;
9688
9689                 for (i = 0; i < extended->shadow_depth; i++) {
9690                         if ((tmp_obj = obj->shadow) == 0)
9691                                 break;
9692                         vm_object_lock(tmp_obj);
9693                         vm_object_unlock(obj);
9694
9695                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9696                                 ref_count--;
9697
9698                         extended->ref_count += ref_count;
9699                         obj = tmp_obj;
9700                 }
9701                 vm_object_unlock(obj);
9702
9703                 if (extended->share_mode == SM_SHARED) {
9704                         register vm_map_entry_t      cur;
9705                         register vm_map_entry_t      last;
9706                         int      my_refs;
9707
9708                         obj = entry->object.vm_object;
9709                         last = vm_map_to_entry(map);
9710                         my_refs = 0;
9711
9712                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9713                                 ref_count--;
9714                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9715                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
9716
9717                         if (my_refs == ref_count)
9718                                 extended->share_mode = SM_PRIVATE_ALIASED;
9719                         else if (my_refs > 1)
9720                                 extended->share_mode = SM_SHARED_ALIASED;
9721                 }
9722         }
9723 }
9724
9725
9726 /* object is locked on entry and locked on return */
9727
9728
9729 static void
9730 vm_map_region_look_for_page(
9731         __unused vm_map_t               map,
9732         __unused vm_map_offset_t        va,
9733         vm_object_t                     object,
9734         vm_object_offset_t              offset,
9735         int                             max_refcnt,
9736         int                             depth,
9737         vm_region_extended_info_t       extended)
9738 {
9739         register vm_page_t      p;
9740         register vm_object_t    shadow;
9741         register int            ref_count;
9742         vm_object_t             caller_object;
9743 #if     MACH_PAGEMAP
9744         kern_return_t           kr;
9745 #endif
9746         shadow = object->shadow;
9747         caller_object = object;
9748
9749
9750         while (TRUE) {
9751
9752                 if ( !(object->pager_trusted) && !(object->internal))
9753                         extended->external_pager = 1;
9754
9755                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9756                         if (shadow && (max_refcnt == 1))
9757                                 extended->pages_shared_now_private++;
9758
9759                         if (!p->fictitious &&
9760                             (p->dirty || pmap_is_modified(p->phys_page)))
9761                                 extended->pages_dirtied++;
9762
9763                         extended->pages_resident++;
9764
9765                         if(object != caller_object)
9766                                 vm_object_unlock(object);
9767
9768                         return;
9769                 }
9770 #if     MACH_PAGEMAP
9771                 if (object->existence_map) {
9772                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9773
9774                                 extended->pages_swapped_out++;
9775
9776                                 if(object != caller_object)
9777                                         vm_object_unlock(object);
9778
9779                                 return;
9780                         }
9781                 } else if (object->internal &&
9782                            object->alive &&
9783                            !object->terminating &&
9784                            object->pager_ready) {
9785
9786                         memory_object_t pager;
9787
9788                         vm_object_paging_begin(object);
9789                         pager = object->pager;
9790                         vm_object_unlock(object);
9791
9792                         kr = memory_object_data_request(
9793                                 pager,
9794                                 offset + object->paging_offset,
9795                                 0, /* just poke the pager */
9796                                 VM_PROT_READ,
9797                                 NULL);
9798
9799                         vm_object_lock(object);
9800                         vm_object_paging_end(object);
9801
9802                         if (kr == KERN_SUCCESS) {
9803                                 /* the pager has that page */
9804                                 extended->pages_swapped_out++;
9805                                 if (object != caller_object)
9806                                         vm_object_unlock(object);
9807                                 return;
9808                         }
9809                 }
9810 #endif /* MACH_PAGEMAP */
9811
9812                 if (shadow) {
9813                         vm_object_lock(shadow);
9814
9815                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9816                                 ref_count--;
9817
9818                         if (++depth > extended->shadow_depth)
9819                                 extended->shadow_depth = depth;
9820
9821                         if (ref_count > max_refcnt)
9822                                 max_refcnt = ref_count;
9823
9824                         if(object != caller_object)
9825                                 vm_object_unlock(object);
9826
9827                         offset = offset + object->vo_shadow_offset;
9828                         object = shadow;
9829                         shadow = object->shadow;
9830                         continue;
9831                 }
9832                 if(object != caller_object)
9833                         vm_object_unlock(object);
9834                 break;
9835         }
9836 }
9837
9838 static int
9839 vm_map_region_count_obj_refs(
9840         vm_map_entry_t    entry,
9841         vm_object_t       object)
9842 {
9843         register int ref_count;
9844         register vm_object_t chk_obj;
9845         register vm_object_t tmp_obj;
9846
9847         if (entry->object.vm_object == 0)
9848                 return(0);
9849
9850         if (entry->is_sub_map)
9851                 return(0);
9852         else {
9853                 ref_count = 0;
9854
9855                 chk_obj = entry->object.vm_object;
9856                 vm_object_lock(chk_obj);
9857
9858                 while (chk_obj) {
9859                         if (chk_obj == object)
9860                                 ref_count++;
9861                         tmp_obj = chk_obj->shadow;
9862                         if (tmp_obj)
9863                                 vm_object_lock(tmp_obj);
9864                         vm_object_unlock(chk_obj);
9865
9866                         chk_obj = tmp_obj;
9867                 }
9868         }
9869         return(ref_count);
9870 }
9871
9872
9873 /*
9874  *      Routine:        vm_map_simplify
9875  *
9876  *      Description:
9877  *              Attempt to simplify the map representation in
9878  *              the vicinity of the given starting address.
9879  *      Note:
9880  *              This routine is intended primarily to keep the
9881  *              kernel maps more compact -- they generally don't
9882  *              benefit from the "expand a map entry" technology
9883  *              at allocation time because the adjacent entry
9884  *              is often wired down.
9885  */
9886 void
9887 vm_map_simplify_entry(
9888         vm_map_t        map,
9889         vm_map_entry_t  this_entry)
9890 {
9891         vm_map_entry_t  prev_entry;
9892
9893         counter(c_vm_map_simplify_entry_called++);
9894
9895         prev_entry = this_entry->vme_prev;
9896
9897         if ((this_entry != vm_map_to_entry(map)) &&
9898             (prev_entry != vm_map_to_entry(map)) &&
9899
9900             (prev_entry->vme_end == this_entry->vme_start) &&
9901
9902             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9903
9904             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9905             ((prev_entry->offset + (prev_entry->vme_end -
9906                                     prev_entry->vme_start))
9907              == this_entry->offset) &&
9908
9909             (prev_entry->inheritance == this_entry->inheritance) &&
9910             (prev_entry->protection == this_entry->protection) &&
9911             (prev_entry->max_protection == this_entry->max_protection) &&
9912             (prev_entry->behavior == this_entry->behavior) &&
9913             (prev_entry->alias == this_entry->alias) &&
9914             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9915             (prev_entry->no_cache == this_entry->no_cache) &&
9916             (prev_entry->wired_count == this_entry->wired_count) &&
9917             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9918
9919             (prev_entry->needs_copy == this_entry->needs_copy) &&
9920             (prev_entry->permanent == this_entry->permanent) &&
9921
9922             (prev_entry->use_pmap == FALSE) &&
9923             (this_entry->use_pmap == FALSE) &&
9924             (prev_entry->in_transition == FALSE) &&
9925             (this_entry->in_transition == FALSE) &&
9926             (prev_entry->needs_wakeup == FALSE) &&
9927             (this_entry->needs_wakeup == FALSE) &&
9928             (prev_entry->is_shared == FALSE) &&
9929             (this_entry->is_shared == FALSE)
9930                 ) {
9931                 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
9932                 this_entry->vme_start = prev_entry->vme_start;
9933                 this_entry->offset = prev_entry->offset;
9934                 if (prev_entry->is_sub_map) {
9935                         vm_map_deallocate(prev_entry->object.sub_map);
9936                 } else {
9937                         vm_object_deallocate(prev_entry->object.vm_object);
9938                 }
9939                 vm_map_entry_dispose(map, prev_entry);
9940                 SAVE_HINT_MAP_WRITE(map, this_entry);
9941                 counter(c_vm_map_simplified++);
9942         }
9943 }
9944
9945 void
9946 vm_map_simplify(
9947         vm_map_t        map,
9948         vm_map_offset_t start)
9949 {
9950         vm_map_entry_t  this_entry;
9951
9952         vm_map_lock(map);
9953         if (vm_map_lookup_entry(map, start, &this_entry)) {
9954                 vm_map_simplify_entry(map, this_entry);
9955                 vm_map_simplify_entry(map, this_entry->vme_next);
9956         }
9957         counter(c_vm_map_simplify_called++);
9958         vm_map_unlock(map);
9959 }
9960
9961 static void
9962 vm_map_simplify_range(
9963         vm_map_t        map,
9964         vm_map_offset_t start,
9965         vm_map_offset_t end)
9966 {
9967         vm_map_entry_t  entry;
9968
9969         /*
9970          * The map should be locked (for "write") by the caller.
9971          */
9972
9973         if (start >= end) {
9974                 /* invalid address range */
9975                 return;
9976         }
9977
9978         start = vm_map_trunc_page(start);
9979         end = vm_map_round_page(end);
9980
9981         if (!vm_map_lookup_entry(map, start, &entry)) {
9982                 /* "start" is not mapped and "entry" ends before "start" */
9983                 if (entry == vm_map_to_entry(map)) {
9984                         /* start with first entry in the map */
9985                         entry = vm_map_first_entry(map);
9986                 } else {
9987                         /* start with next entry */
9988                         entry = entry->vme_next;
9989                 }
9990         }
9991
9992         while (entry != vm_map_to_entry(map) &&
9993                entry->vme_start <= end) {
9994                 /* try and coalesce "entry" with its previous entry */
9995                 vm_map_simplify_entry(map, entry);
9996                 entry = entry->vme_next;
9997         }
9998 }
9999
10000
10001 /*
10002  *      Routine:        vm_map_machine_attribute
10003  *      Purpose:
10004  *              Provide machine-specific attributes to mappings,
10005  *              such as cachability etc. for machines that provide
10006  *              them.  NUMA architectures and machines with big/strange
10007  *              caches will use this.
10008  *      Note:
10009  *              Responsibilities for locking and checking are handled here,
10010  *              everything else in the pmap module. If any non-volatile
10011  *              information must be kept, the pmap module should handle
10012  *              it itself. [This assumes that attributes do not
10013  *              need to be inherited, which seems ok to me]
10014  */
10015 kern_return_t
10016 vm_map_machine_attribute(
10017         vm_map_t                        map,
10018         vm_map_offset_t         start,
10019         vm_map_offset_t         end,
10020         vm_machine_attribute_t  attribute,
10021         vm_machine_attribute_val_t* value)              /* IN/OUT */
10022 {
10023         kern_return_t   ret;
10024         vm_map_size_t sync_size;
10025         vm_map_entry_t entry;
10026
10027         if (start < vm_map_min(map) || end > vm_map_max(map))
10028                 return KERN_INVALID_ADDRESS;
10029
10030         /* Figure how much memory we need to flush (in page increments) */
10031         sync_size = end - start;
10032
10033         vm_map_lock(map);
10034
10035         if (attribute != MATTR_CACHE) {
10036                 /* If we don't have to find physical addresses, we */
10037                 /* don't have to do an explicit traversal here.    */
10038                 ret = pmap_attribute(map->pmap, start, end-start,
10039                                      attribute, value);
10040                 vm_map_unlock(map);
10041                 return ret;
10042         }
10043
10044         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
10045
10046         while(sync_size) {
10047                 if (vm_map_lookup_entry(map, start, &entry)) {
10048                         vm_map_size_t   sub_size;
10049                         if((entry->vme_end - start) > sync_size) {
10050                                 sub_size = sync_size;
10051                                 sync_size = 0;
10052                         } else {
10053                                 sub_size = entry->vme_end - start;
10054                                 sync_size -= sub_size;
10055                         }
10056                         if(entry->is_sub_map) {
10057                                 vm_map_offset_t sub_start;
10058                                 vm_map_offset_t sub_end;
10059
10060                                 sub_start = (start - entry->vme_start)
10061                                         + entry->offset;
10062                                 sub_end = sub_start + sub_size;
10063                                 vm_map_machine_attribute(
10064                                         entry->object.sub_map,
10065                                         sub_start,
10066                                         sub_end,
10067                                         attribute, value);
10068                         } else {
10069                                 if(entry->object.vm_object) {
10070                                         vm_page_t               m;
10071                                         vm_object_t             object;
10072                                         vm_object_t             base_object;
10073                                         vm_object_t             last_object;
10074                                         vm_object_offset_t      offset;
10075                                         vm_object_offset_t      base_offset;
10076                                         vm_map_size_t           range;
10077                                         range = sub_size;
10078                                         offset = (start - entry->vme_start)
10079                                                 + entry->offset;
10080                                         base_offset = offset;
10081                                         object = entry->object.vm_object;
10082                                         base_object = object;
10083                                         last_object = NULL;
10084
10085                                         vm_object_lock(object);
10086
10087                                         while (range) {
10088                                                 m = vm_page_lookup(
10089                                                         object, offset);
10090
10091                                                 if (m && !m->fictitious) {
10092                                                         ret =
10093                                                                 pmap_attribute_cache_sync(
10094                                                                         m->phys_page,
10095                                                                         PAGE_SIZE,
10096                                                                         attribute, value);
10097
10098                                                 } else if (object->shadow) {
10099                                                         offset = offset + object->vo_shadow_offset;
10100                                                         last_object = object;
10101                                                         object = object->shadow;
10102                                                         vm_object_lock(last_object->shadow);
10103                                                         vm_object_unlock(last_object);
10104                                                         continue;
10105                                                 }
10106                                                 range -= PAGE_SIZE;
10107
10108                                                 if (base_object != object) {
10109                                                         vm_object_unlock(object);
10110                                                         vm_object_lock(base_object);
10111                                                         object = base_object;
10112                                                 }
10113                                                 /* Bump to the next page */
10114                                                 base_offset += PAGE_SIZE;
10115                                                 offset = base_offset;
10116                                         }
10117                                         vm_object_unlock(object);
10118                                 }
10119                         }
10120                         start += sub_size;
10121                 } else {
10122                         vm_map_unlock(map);
10123                         return KERN_FAILURE;
10124                 }
10125
10126         }
10127
10128         vm_map_unlock(map);
10129
10130         return ret;
10131 }
10132
10133 /*
10134  *      vm_map_behavior_set:
10135  *
10136  *      Sets the paging reference behavior of the specified address
10137  *      range in the target map.  Paging reference behavior affects
10138  *      how pagein operations resulting from faults on the map will be
10139  *      clustered.
10140  */
10141 kern_return_t
10142 vm_map_behavior_set(
10143         vm_map_t        map,
10144         vm_map_offset_t start,
10145         vm_map_offset_t end,
10146         vm_behavior_t   new_behavior)
10147 {
10148         register vm_map_entry_t entry;
10149         vm_map_entry_t  temp_entry;
10150
10151         XPR(XPR_VM_MAP,
10152             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10153             map, start, end, new_behavior, 0);
10154
10155         if (start > end ||
10156             start < vm_map_min(map) ||
10157             end > vm_map_max(map)) {
10158                 return KERN_NO_SPACE;
10159         }
10160
10161         switch (new_behavior) {
10162
10163         /*
10164          * This first block of behaviors all set a persistent state on the specified
10165          * memory range.  All we have to do here is to record the desired behavior
10166          * in the vm_map_entry_t's.
10167          */
10168
10169         case VM_BEHAVIOR_DEFAULT:
10170         case VM_BEHAVIOR_RANDOM:
10171         case VM_BEHAVIOR_SEQUENTIAL:
10172         case VM_BEHAVIOR_RSEQNTL:
10173         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10174                 vm_map_lock(map);
10175
10176                 /*
10177                  *      The entire address range must be valid for the map.
10178                  *      Note that vm_map_range_check() does a
10179                  *      vm_map_lookup_entry() internally and returns the
10180                  *      entry containing the start of the address range if
10181                  *      the entire range is valid.
10182                  */
10183                 if (vm_map_range_check(map, start, end, &temp_entry)) {
10184                         entry = temp_entry;
10185                         vm_map_clip_start(map, entry, start);
10186                 }
10187                 else {
10188                         vm_map_unlock(map);
10189                         return(KERN_INVALID_ADDRESS);
10190                 }
10191
10192                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10193                         vm_map_clip_end(map, entry, end);
10194                         assert(!entry->use_pmap);
10195
10196                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10197                                 entry->zero_wired_pages = TRUE;
10198                         } else {
10199                                 entry->behavior = new_behavior;
10200                         }
10201                         entry = entry->vme_next;
10202                 }
10203
10204                 vm_map_unlock(map);
10205                 break;
10206
10207         /*
10208          * The rest of these are different from the above in that they cause
10209          * an immediate action to take place as opposed to setting a behavior that
10210          * affects future actions.
10211          */
10212
10213         case VM_BEHAVIOR_WILLNEED:
10214                 return vm_map_willneed(map, start, end);
10215
10216         case VM_BEHAVIOR_DONTNEED:
10217                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10218
10219         case VM_BEHAVIOR_FREE:
10220                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10221
10222         case VM_BEHAVIOR_REUSABLE:
10223                 return vm_map_reusable_pages(map, start, end);
10224
10225         case VM_BEHAVIOR_REUSE:
10226                 return vm_map_reuse_pages(map, start, end);
10227
10228         case VM_BEHAVIOR_CAN_REUSE:
10229                 return vm_map_can_reuse(map, start, end);
10230
10231         default:
10232                 return(KERN_INVALID_ARGUMENT);
10233         }
10234
10235         return(KERN_SUCCESS);
10236 }
10237
10238
10239 /*
10240  * Internals for madvise(MADV_WILLNEED) system call.
10241  *
10242  * The present implementation is to do a read-ahead if the mapping corresponds
10243  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10244  * and basically ignore the "advice" (which we are always free to do).
10245  */
10246
10247
10248 static kern_return_t
10249 vm_map_willneed(
10250         vm_map_t        map,
10251         vm_map_offset_t start,
10252         vm_map_offset_t end
10253 )
10254 {
10255         vm_map_entry_t                  entry;
10256         vm_object_t                     object;
10257         memory_object_t                 pager;
10258         struct vm_object_fault_info     fault_info;
10259         kern_return_t                   kr;
10260         vm_object_size_t                len;
10261         vm_object_offset_t              offset;
10262
10263         /*
10264          * Fill in static values in fault_info.  Several fields get ignored by the code
10265          * we call, but we'll fill them in anyway since uninitialized fields are bad
10266          * when it comes to future backwards compatibility.
10267          */
10268
10269         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10270         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10271         fault_info.no_cache      = FALSE;                       /* ignored value */
10272         fault_info.stealth       = TRUE;
10273         fault_info.io_sync = FALSE;
10274         fault_info.cs_bypass = FALSE;
10275         fault_info.mark_zf_absent = FALSE;
10276
10277         /*
10278          * The MADV_WILLNEED operation doesn't require any changes to the
10279          * vm_map_entry_t's, so the read lock is sufficient.
10280          */
10281
10282         vm_map_lock_read(map);
10283
10284         /*
10285          * The madvise semantics require that the address range be fully
10286          * allocated with no holes.  Otherwise, we're required to return
10287          * an error.
10288          */
10289
10290         if (! vm_map_range_check(map, start, end, &entry)) {
10291                 vm_map_unlock_read(map);
10292                 return KERN_INVALID_ADDRESS;
10293         }
10294
10295         /*
10296          * Examine each vm_map_entry_t in the range.
10297          */
10298         for (; entry != vm_map_to_entry(map) && start < end; ) {
10299
10300                 /*
10301                  * The first time through, the start address could be anywhere
10302                  * within the vm_map_entry we found.  So adjust the offset to
10303                  * correspond.  After that, the offset will always be zero to
10304                  * correspond to the beginning of the current vm_map_entry.
10305                  */
10306                 offset = (start - entry->vme_start) + entry->offset;
10307
10308                 /*
10309                  * Set the length so we don't go beyond the end of the
10310                  * map_entry or beyond the end of the range we were given.
10311                  * This range could span also multiple map entries all of which
10312                  * map different files, so make sure we only do the right amount
10313                  * of I/O for each object.  Note that it's possible for there
10314                  * to be multiple map entries all referring to the same object
10315                  * but with different page permissions, but it's not worth
10316                  * trying to optimize that case.
10317                  */
10318                 len = MIN(entry->vme_end - start, end - start);
10319
10320                 if ((vm_size_t) len != len) {
10321                         /* 32-bit overflow */
10322                         len = (vm_size_t) (0 - PAGE_SIZE);
10323                 }
10324                 fault_info.cluster_size = (vm_size_t) len;
10325                 fault_info.lo_offset    = offset;
10326                 fault_info.hi_offset    = offset + len;
10327                 fault_info.user_tag     = entry->alias;
10328
10329                 /*
10330                  * If there's no read permission to this mapping, then just
10331                  * skip it.
10332                  */
10333                 if ((entry->protection & VM_PROT_READ) == 0) {
10334                         entry = entry->vme_next;
10335                         start = entry->vme_start;
10336                         continue;
10337                 }
10338
10339                 /*
10340                  * Find the file object backing this map entry.  If there is
10341                  * none, then we simply ignore the "will need" advice for this
10342                  * entry and go on to the next one.
10343                  */
10344                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10345                         entry = entry->vme_next;
10346                         start = entry->vme_start;
10347                         continue;
10348                 }
10349
10350                 /*
10351                  * The data_request() could take a long time, so let's
10352                  * release the map lock to avoid blocking other threads.
10353                  */
10354                 vm_map_unlock_read(map);
10355
10356                 vm_object_paging_begin(object);
10357                 pager = object->pager;
10358                 vm_object_unlock(object);
10359
10360                 /*
10361                  * Get the data from the object asynchronously.
10362                  *
10363                  * Note that memory_object_data_request() places limits on the
10364                  * amount of I/O it will do.  Regardless of the len we
10365                  * specified, it won't do more than MAX_UPL_TRANSFER and it
10366                  * silently truncates the len to that size.  This isn't
10367                  * necessarily bad since madvise shouldn't really be used to
10368                  * page in unlimited amounts of data.  Other Unix variants
10369                  * limit the willneed case as well.  If this turns out to be an
10370                  * issue for developers, then we can always adjust the policy
10371                  * here and still be backwards compatible since this is all
10372                  * just "advice".
10373                  */
10374                 kr = memory_object_data_request(
10375                         pager,
10376                         offset + object->paging_offset,
10377                         0,      /* ignored */
10378                         VM_PROT_READ,
10379                         (memory_object_fault_info_t)&fault_info);
10380
10381                 vm_object_lock(object);
10382                 vm_object_paging_end(object);
10383                 vm_object_unlock(object);
10384
10385                 /*
10386                  * If we couldn't do the I/O for some reason, just give up on
10387                  * the madvise.  We still return success to the user since
10388                  * madvise isn't supposed to fail when the advice can't be
10389                  * taken.
10390                  */
10391                 if (kr != KERN_SUCCESS) {
10392                         return KERN_SUCCESS;
10393                 }
10394
10395                 start += len;
10396                 if (start >= end) {
10397                         /* done */
10398                         return KERN_SUCCESS;
10399                 }
10400
10401                 /* look up next entry */
10402                 vm_map_lock_read(map);
10403                 if (! vm_map_lookup_entry(map, start, &entry)) {
10404                         /*
10405                          * There's a new hole in the address range.
10406                          */
10407                         vm_map_unlock_read(map);
10408                         return KERN_INVALID_ADDRESS;
10409                 }
10410         }
10411
10412         vm_map_unlock_read(map);
10413         return KERN_SUCCESS;
10414 }
10415
10416 static boolean_t
10417 vm_map_entry_is_reusable(
10418         vm_map_entry_t entry)
10419 {
10420         vm_object_t object;
10421
10422         if (entry->is_shared ||
10423             entry->is_sub_map ||
10424             entry->in_transition ||
10425             entry->protection != VM_PROT_DEFAULT ||
10426             entry->max_protection != VM_PROT_ALL ||
10427             entry->inheritance != VM_INHERIT_DEFAULT ||
10428             entry->no_cache ||
10429             entry->permanent ||
10430             entry->superpage_size != 0 ||
10431             entry->zero_wired_pages ||
10432             entry->wired_count != 0 ||
10433             entry->user_wired_count != 0) {
10434                 return FALSE;
10435         }
10436
10437         object = entry->object.vm_object;
10438         if (object == VM_OBJECT_NULL) {
10439                 return TRUE;
10440         }
10441         if (object->ref_count == 1 &&
10442             object->wired_page_count == 0 &&
10443             object->copy == VM_OBJECT_NULL &&
10444             object->shadow == VM_OBJECT_NULL &&
10445             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10446             object->internal &&
10447             !object->true_share &&
10448             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10449             !object->code_signed) {
10450                 return TRUE;
10451         }
10452         return FALSE;
10453
10454
10455 }
10456
10457 static kern_return_t
10458 vm_map_reuse_pages(
10459         vm_map_t        map,
10460         vm_map_offset_t start,
10461         vm_map_offset_t end)
10462 {
10463         vm_map_entry_t                  entry;
10464         vm_object_t                     object;
10465         vm_object_offset_t              start_offset, end_offset;
10466
10467         /*
10468          * The MADV_REUSE operation doesn't require any changes to the
10469          * vm_map_entry_t's, so the read lock is sufficient.
10470          */
10471
10472         vm_map_lock_read(map);
10473
10474         /*
10475          * The madvise semantics require that the address range be fully
10476          * allocated with no holes.  Otherwise, we're required to return
10477          * an error.
10478          */
10479
10480         if (!vm_map_range_check(map, start, end, &entry)) {
10481                 vm_map_unlock_read(map);
10482                 vm_page_stats_reusable.reuse_pages_failure++;
10483                 return KERN_INVALID_ADDRESS;
10484         }
10485
10486         /*
10487          * Examine each vm_map_entry_t in the range.
10488          */
10489         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10490              entry = entry->vme_next) {
10491                 /*
10492                  * Sanity check on the VM map entry.
10493                  */
10494                 if (! vm_map_entry_is_reusable(entry)) {
10495                         vm_map_unlock_read(map);
10496                         vm_page_stats_reusable.reuse_pages_failure++;
10497                         return KERN_INVALID_ADDRESS;
10498                 }
10499
10500                 /*
10501                  * The first time through, the start address could be anywhere
10502                  * within the vm_map_entry we found.  So adjust the offset to
10503                  * correspond.
10504                  */
10505                 if (entry->vme_start < start) {
10506                         start_offset = start - entry->vme_start;
10507                 } else {
10508                         start_offset = 0;
10509                 }
10510                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10511                 start_offset += entry->offset;
10512                 end_offset += entry->offset;
10513
10514                 object = entry->object.vm_object;
10515                 if (object != VM_OBJECT_NULL) {
10516                         vm_object_lock(object);
10517                         vm_object_reuse_pages(object, start_offset, end_offset,
10518                                               TRUE);
10519                         vm_object_unlock(object);
10520                 }
10521
10522                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10523                         /*
10524                          * XXX
10525                          * We do not hold the VM map exclusively here.
10526                          * The "alias" field is not that critical, so it's
10527                          * safe to update it here, as long as it is the only
10528                          * one that can be modified while holding the VM map
10529                          * "shared".
10530                          */
10531                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10532                 }
10533         }
10534
10535         vm_map_unlock_read(map);
10536         vm_page_stats_reusable.reuse_pages_success++;
10537         return KERN_SUCCESS;
10538 }
10539
10540
10541 static kern_return_t
10542 vm_map_reusable_pages(
10543         vm_map_t        map,
10544         vm_map_offset_t start,
10545         vm_map_offset_t end)
10546 {
10547         vm_map_entry_t                  entry;
10548         vm_object_t                     object;
10549         vm_object_offset_t              start_offset, end_offset;
10550
10551         /*
10552          * The MADV_REUSABLE operation doesn't require any changes to the
10553          * vm_map_entry_t's, so the read lock is sufficient.
10554          */
10555
10556         vm_map_lock_read(map);
10557
10558         /*
10559          * The madvise semantics require that the address range be fully
10560          * allocated with no holes.  Otherwise, we're required to return
10561          * an error.
10562          */
10563
10564         if (!vm_map_range_check(map, start, end, &entry)) {
10565                 vm_map_unlock_read(map);
10566                 vm_page_stats_reusable.reusable_pages_failure++;
10567                 return KERN_INVALID_ADDRESS;
10568         }
10569
10570         /*
10571          * Examine each vm_map_entry_t in the range.
10572          */
10573         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10574              entry = entry->vme_next) {
10575                 int kill_pages = 0;
10576
10577                 /*
10578                  * Sanity check on the VM map entry.
10579                  */
10580                 if (! vm_map_entry_is_reusable(entry)) {
10581                         vm_map_unlock_read(map);
10582                         vm_page_stats_reusable.reusable_pages_failure++;
10583                         return KERN_INVALID_ADDRESS;
10584                 }
10585
10586                 /*
10587                  * The first time through, the start address could be anywhere
10588                  * within the vm_map_entry we found.  So adjust the offset to
10589                  * correspond.
10590                  */
10591                 if (entry->vme_start < start) {
10592                         start_offset = start - entry->vme_start;
10593                 } else {
10594                         start_offset = 0;
10595                 }
10596                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10597                 start_offset += entry->offset;
10598                 end_offset += entry->offset;
10599
10600                 object = entry->object.vm_object;
10601                 if (object == VM_OBJECT_NULL)
10602                         continue;
10603
10604
10605                 vm_object_lock(object);
10606                 if (object->ref_count == 1 && !object->shadow)
10607                         kill_pages = 1;
10608                 else
10609                         kill_pages = -1;
10610                 if (kill_pages != -1) {
10611                         vm_object_deactivate_pages(object,
10612                                                    start_offset,
10613                                                    end_offset - start_offset,
10614                                                    kill_pages,
10615                                                    TRUE /*reusable_pages*/);
10616                 } else {
10617                         vm_page_stats_reusable.reusable_pages_shared++;
10618                 }
10619                 vm_object_unlock(object);
10620
10621                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10622                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10623                         /*
10624                          * XXX
10625                          * We do not hold the VM map exclusively here.
10626                          * The "alias" field is not that critical, so it's
10627                          * safe to update it here, as long as it is the only
10628                          * one that can be modified while holding the VM map
10629                          * "shared".
10630                          */
10631                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10632                 }
10633         }
10634
10635         vm_map_unlock_read(map);
10636         vm_page_stats_reusable.reusable_pages_success++;
10637         return KERN_SUCCESS;
10638 }
10639
10640
10641 static kern_return_t
10642 vm_map_can_reuse(
10643         vm_map_t        map,
10644         vm_map_offset_t start,
10645         vm_map_offset_t end)
10646 {
10647         vm_map_entry_t                  entry;
10648
10649         /*
10650          * The MADV_REUSABLE operation doesn't require any changes to the
10651          * vm_map_entry_t's, so the read lock is sufficient.
10652          */
10653
10654         vm_map_lock_read(map);
10655
10656         /*
10657          * The madvise semantics require that the address range be fully
10658          * allocated with no holes.  Otherwise, we're required to return
10659          * an error.
10660          */
10661
10662         if (!vm_map_range_check(map, start, end, &entry)) {
10663                 vm_map_unlock_read(map);
10664                 vm_page_stats_reusable.can_reuse_failure++;
10665                 return KERN_INVALID_ADDRESS;
10666         }
10667
10668         /*
10669          * Examine each vm_map_entry_t in the range.
10670          */
10671         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10672              entry = entry->vme_next) {
10673                 /*
10674                  * Sanity check on the VM map entry.
10675                  */
10676                 if (! vm_map_entry_is_reusable(entry)) {
10677                         vm_map_unlock_read(map);
10678                         vm_page_stats_reusable.can_reuse_failure++;
10679                         return KERN_INVALID_ADDRESS;
10680                 }
10681         }
10682
10683         vm_map_unlock_read(map);
10684         vm_page_stats_reusable.can_reuse_success++;
10685         return KERN_SUCCESS;
10686 }
10687
10688
10689
10690 #include <mach_kdb.h>
10691 #if     MACH_KDB
10692 #include <ddb/db_output.h>
10693 #include <vm/vm_print.h>
10694
10695 #define printf  db_printf
10696
10697 /*
10698  * Forward declarations for internal functions.
10699  */
10700 extern void vm_map_links_print(
10701         struct vm_map_links     *links);
10702
10703 extern void vm_map_header_print(
10704         struct vm_map_header    *header);
10705
10706 extern void vm_map_entry_print(
10707         vm_map_entry_t          entry);
10708
10709 extern void vm_follow_entry(
10710         vm_map_entry_t          entry);
10711
10712 extern void vm_follow_map(
10713         vm_map_t                map);
10714
10715 /*
10716  *      vm_map_links_print:     [ debug ]
10717  */
10718 void
10719 vm_map_links_print(
10720         struct vm_map_links     *links)
10721 {
10722         iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
10723                 links->prev,
10724                 links->next,
10725                 (unsigned long long)links->start,
10726                 (unsigned long long)links->end);
10727 }
10728
10729 /*
10730  *      vm_map_header_print:    [ debug ]
10731  */
10732 void
10733 vm_map_header_print(
10734         struct vm_map_header    *header)
10735 {
10736         vm_map_links_print(&header->links);
10737         iprintf("nentries = %08X, %sentries_pageable\n",
10738                 header->nentries,
10739                 (header->entries_pageable ? "" : "!"));
10740 }
10741
10742 /*
10743  *      vm_follow_entry:        [ debug ]
10744  */
10745 void
10746 vm_follow_entry(
10747         vm_map_entry_t entry)
10748 {
10749         int shadows;
10750
10751         iprintf("map entry %08X\n", entry);
10752
10753         db_indent += 2;
10754
10755         shadows = vm_follow_object(entry->object.vm_object);
10756         iprintf("Total objects : %d\n",shadows);
10757
10758         db_indent -= 2;
10759 }
10760
10761 /*
10762  *      vm_map_entry_print:     [ debug ]
10763  */
10764 void
10765 vm_map_entry_print(
10766         register vm_map_entry_t entry)
10767 {
10768         static const char *inheritance_name[4] =
10769                 { "share", "copy", "none", "?"};
10770         static const char *behavior_name[4] =
10771                 { "dflt", "rand", "seqtl", "rseqntl" };
10772
10773         iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10774
10775         db_indent += 2;
10776
10777         vm_map_links_print(&entry->links);
10778
10779         iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
10780                 (unsigned long long)entry->vme_start,
10781                 (unsigned long long)entry->vme_end,
10782                 entry->protection,
10783                 entry->max_protection,
10784                 inheritance_name[(entry->inheritance & 0x3)]);
10785
10786         iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10787                 behavior_name[(entry->behavior & 0x3)],
10788                 entry->wired_count,
10789                 entry->user_wired_count);
10790         iprintf("%sin_transition, %sneeds_wakeup\n",
10791                 (entry->in_transition ? "" : "!"),
10792                 (entry->needs_wakeup ? "" : "!"));
10793
10794         if (entry->is_sub_map) {
10795                 iprintf("submap = %08X - offset = %016llX\n",
10796                         entry->object.sub_map,
10797                         (unsigned long long)entry->offset);
10798         } else {
10799                 iprintf("object = %08X  offset = %016llX - ",
10800                         entry->object.vm_object,
10801                         (unsigned long long)entry->offset);
10802                 printf("%sis_shared, %sneeds_copy\n",
10803                        (entry->is_shared ? "" : "!"),
10804                        (entry->needs_copy ? "" : "!"));
10805         }
10806
10807         db_indent -= 2;
10808 }
10809
10810 /*
10811  *      vm_follow_map:  [ debug ]
10812  */
10813 void
10814 vm_follow_map(
10815         vm_map_t map)
10816 {
10817         register vm_map_entry_t entry;
10818
10819         iprintf("task map %08X\n", map);
10820
10821         db_indent += 2;
10822
10823         for (entry = vm_map_first_entry(map);
10824              entry && entry != vm_map_to_entry(map);
10825              entry = entry->vme_next) {
10826                 vm_follow_entry(entry);
10827         }
10828
10829         db_indent -= 2;
10830 }
10831
10832 /*
10833  *      vm_map_print:   [ debug ]
10834  */
10835 void
10836 vm_map_print(
10837         db_addr_t inmap)
10838 {
10839         register vm_map_entry_t entry;
10840         vm_map_t map;
10841 #if TASK_SWAPPER
10842         char *swstate;
10843 #endif /* TASK_SWAPPER */
10844
10845         map = (vm_map_t)(long)
10846                 inmap;  /* Make sure we have the right type */
10847
10848         iprintf("task map %08X\n", map);
10849
10850         db_indent += 2;
10851
10852         vm_map_header_print(&map->hdr);
10853
10854         iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
10855                 map->pmap,
10856                 map->size,
10857                 map->ref_count,
10858                 map->hint,
10859                 map->first_free);
10860
10861         iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10862                 (map->wait_for_space ? "" : "!"),
10863                 (map->wiring_required ? "" : "!"),
10864                 map->timestamp);
10865
10866 #if     TASK_SWAPPER
10867         switch (map->sw_state) {
10868         case MAP_SW_IN:
10869                 swstate = "SW_IN";
10870                 break;
10871         case MAP_SW_OUT:
10872                 swstate = "SW_OUT";
10873                 break;
10874         default:
10875                 swstate = "????";
10876                 break;
10877         }
10878         iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10879 #endif  /* TASK_SWAPPER */
10880
10881         for (entry = vm_map_first_entry(map);
10882              entry && entry != vm_map_to_entry(map);
10883              entry = entry->vme_next) {
10884                 vm_map_entry_print(entry);
10885         }
10886
10887         db_indent -= 2;
10888 }
10889
10890 /*
10891  *      Routine:        vm_map_copy_print
10892  *      Purpose:
10893  *              Pretty-print a copy object for ddb.
10894  */
10895
10896 void
10897 vm_map_copy_print(
10898         db_addr_t       incopy)
10899 {
10900         vm_map_copy_t copy;
10901         vm_map_entry_t entry;
10902
10903         copy = (vm_map_copy_t)(long)
10904                 incopy; /* Make sure we have the right type */
10905
10906         printf("copy object 0x%x\n", copy);
10907
10908         db_indent += 2;
10909
10910         iprintf("type=%d", copy->type);
10911         switch (copy->type) {
10912         case VM_MAP_COPY_ENTRY_LIST:
10913                 printf("[entry_list]");
10914                 break;
10915
10916         case VM_MAP_COPY_OBJECT:
10917                 printf("[object]");
10918                 break;
10919
10920         case VM_MAP_COPY_KERNEL_BUFFER:
10921                 printf("[kernel_buffer]");
10922                 break;
10923
10924         default:
10925                 printf("[bad type]");
10926                 break;
10927         }
10928         printf(", offset=0x%llx", (unsigned long long)copy->offset);
10929         printf(", size=0x%x\n", copy->size);
10930
10931         switch (copy->type) {
10932         case VM_MAP_COPY_ENTRY_LIST:
10933                 vm_map_header_print(&copy->cpy_hdr);
10934                 for (entry = vm_map_copy_first_entry(copy);
10935                      entry && entry != vm_map_copy_to_entry(copy);
10936                      entry = entry->vme_next) {
10937                         vm_map_entry_print(entry);
10938                 }
10939                 break;
10940
10941         case VM_MAP_COPY_OBJECT:
10942                 iprintf("object=0x%x\n", copy->cpy_object);
10943                 break;
10944
10945         case VM_MAP_COPY_KERNEL_BUFFER:
10946                 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
10947                 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
10948                 break;
10949
10950         }
10951
10952         db_indent -=2;
10953 }
10954
10955 /*
10956  *      db_vm_map_total_size(map)       [ debug ]
10957  *
10958  *      return the total virtual size (in bytes) of the map
10959  */
10960 vm_map_size_t
10961 db_vm_map_total_size(
10962         db_addr_t       inmap)
10963 {
10964         vm_map_entry_t  entry;
10965         vm_map_size_t   total;
10966         vm_map_t map;
10967
10968         map = (vm_map_t)(long)
10969                 inmap;  /* Make sure we have the right type */
10970
10971         total = 0;
10972         for (entry = vm_map_first_entry(map);
10973              entry != vm_map_to_entry(map);
10974              entry = entry->vme_next) {
10975                 total += entry->vme_end - entry->vme_start;
10976         }
10977
10978         return total;
10979 }
10980
10981 #endif  /* MACH_KDB */
10982
10983 /*
10984  *      Routine:        vm_map_entry_insert
10985  *
10986  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
10987  */
10988 vm_map_entry_t
10989 vm_map_entry_insert(
10990         vm_map_t                map,
10991         vm_map_entry_t          insp_entry,
10992         vm_map_offset_t         start,
10993         vm_map_offset_t         end,
10994         vm_object_t             object,
10995         vm_object_offset_t      offset,
10996         boolean_t               needs_copy,
10997         boolean_t               is_shared,
10998         boolean_t               in_transition,
10999         vm_prot_t               cur_protection,
11000         vm_prot_t               max_protection,
11001         vm_behavior_t           behavior,
11002         vm_inherit_t            inheritance,
11003         unsigned                wired_count,
11004         boolean_t               no_cache,
11005         boolean_t               permanent,
11006         unsigned int            superpage_size)
11007 {
11008         vm_map_entry_t  new_entry;
11009
11010         assert(insp_entry != (vm_map_entry_t)0);
11011
11012         new_entry = vm_map_entry_create(map);
11013
11014         new_entry->vme_start = start;
11015         new_entry->vme_end = end;
11016         assert(page_aligned(new_entry->vme_start));
11017         assert(page_aligned(new_entry->vme_end));
11018
11019         new_entry->object.vm_object = object;
11020         new_entry->offset = offset;
11021         new_entry->is_shared = is_shared;
11022         new_entry->is_sub_map = FALSE;
11023         new_entry->needs_copy = needs_copy;
11024         new_entry->in_transition = in_transition;
11025         new_entry->needs_wakeup = FALSE;
11026         new_entry->inheritance = inheritance;
11027         new_entry->protection = cur_protection;
11028         new_entry->max_protection = max_protection;
11029         new_entry->behavior = behavior;
11030         new_entry->wired_count = wired_count;
11031         new_entry->user_wired_count = 0;
11032         new_entry->use_pmap = FALSE;
11033         new_entry->alias = 0;
11034         new_entry->zero_wired_pages = FALSE;
11035         new_entry->no_cache = no_cache;
11036         new_entry->permanent = permanent;
11037         new_entry->superpage_size = superpage_size;
11038         new_entry->used_for_jit = FALSE;
11039
11040         /*
11041          *      Insert the new entry into the list.
11042          */
11043
11044         vm_map_store_entry_link(map, insp_entry, new_entry);
11045         map->size += end - start;
11046
11047         /*
11048          *      Update the free space hint and the lookup hint.
11049          */
11050
11051         SAVE_HINT_MAP_WRITE(map, new_entry);
11052         return new_entry;
11053 }
11054
11055 /*
11056  *      Routine:        vm_map_remap_extract
11057  *
11058  *      Descritpion:    This routine returns a vm_entry list from a map.
11059  */
11060 static kern_return_t
11061 vm_map_remap_extract(
11062         vm_map_t                map,
11063         vm_map_offset_t         addr,
11064         vm_map_size_t           size,
11065         boolean_t               copy,
11066         struct vm_map_header    *map_header,
11067         vm_prot_t               *cur_protection,
11068         vm_prot_t               *max_protection,
11069         /* What, no behavior? */
11070         vm_inherit_t            inheritance,
11071         boolean_t               pageable)
11072 {
11073         kern_return_t           result;
11074         vm_map_size_t           mapped_size;
11075         vm_map_size_t           tmp_size;
11076         vm_map_entry_t          src_entry;     /* result of last map lookup */
11077         vm_map_entry_t          new_entry;
11078         vm_object_offset_t      offset;
11079         vm_map_offset_t         map_address;
11080         vm_map_offset_t         src_start;     /* start of entry to map */
11081         vm_map_offset_t         src_end;       /* end of region to be mapped */
11082         vm_object_t             object;
11083         vm_map_version_t        version;
11084         boolean_t               src_needs_copy;
11085         boolean_t               new_entry_needs_copy;
11086
11087         assert(map != VM_MAP_NULL);
11088         assert(size != 0 && size == vm_map_round_page(size));
11089         assert(inheritance == VM_INHERIT_NONE ||
11090                inheritance == VM_INHERIT_COPY ||
11091                inheritance == VM_INHERIT_SHARE);
11092
11093         /*
11094          *      Compute start and end of region.
11095          */
11096         src_start = vm_map_trunc_page(addr);
11097         src_end = vm_map_round_page(src_start + size);
11098
11099         /*
11100          *      Initialize map_header.
11101          */
11102         map_header->links.next = (struct vm_map_entry *)&map_header->links;
11103         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11104         map_header->nentries = 0;
11105         map_header->entries_pageable = pageable;
11106
11107         vm_map_store_init( map_header );
11108
11109         *cur_protection = VM_PROT_ALL;
11110         *max_protection = VM_PROT_ALL;
11111
11112         map_address = 0;
11113         mapped_size = 0;
11114         result = KERN_SUCCESS;
11115
11116         /*
11117          *      The specified source virtual space might correspond to
11118          *      multiple map entries, need to loop on them.
11119          */
11120         vm_map_lock(map);
11121         while (mapped_size != size) {
11122                 vm_map_size_t   entry_size;
11123
11124                 /*
11125                  *      Find the beginning of the region.
11126                  */
11127                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11128                         result = KERN_INVALID_ADDRESS;
11129                         break;
11130                 }
11131
11132                 if (src_start < src_entry->vme_start ||
11133                     (mapped_size && src_start != src_entry->vme_start)) {
11134                         result = KERN_INVALID_ADDRESS;
11135                         break;
11136                 }
11137
11138                 tmp_size = size - mapped_size;
11139                 if (src_end > src_entry->vme_end)
11140                         tmp_size -= (src_end - src_entry->vme_end);
11141
11142                 entry_size = (vm_map_size_t)(src_entry->vme_end -
11143                                              src_entry->vme_start);
11144
11145                 if(src_entry->is_sub_map) {
11146                         vm_map_reference(src_entry->object.sub_map);
11147                         object = VM_OBJECT_NULL;
11148                 } else {
11149                         object = src_entry->object.vm_object;
11150
11151                         if (object == VM_OBJECT_NULL) {
11152                                 object = vm_object_allocate(entry_size);
11153                                 src_entry->offset = 0;
11154                                 src_entry->object.vm_object = object;
11155                         } else if (object->copy_strategy !=
11156                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11157                                 /*
11158                                  *      We are already using an asymmetric
11159                                  *      copy, and therefore we already have
11160                                  *      the right object.
11161                                  */
11162                                 assert(!src_entry->needs_copy);
11163                         } else if (src_entry->needs_copy || object->shadowed ||
11164                                    (object->internal && !object->true_share &&
11165                                     !src_entry->is_shared &&
11166                                     object->vo_size > entry_size)) {
11167
11168                                 vm_object_shadow(&src_entry->object.vm_object,
11169                                                  &src_entry->offset,
11170                                                  entry_size);
11171
11172                                 if (!src_entry->needs_copy &&
11173                                     (src_entry->protection & VM_PROT_WRITE)) {
11174                                         vm_prot_t prot;
11175
11176                                         prot = src_entry->protection & ~VM_PROT_WRITE;
11177
11178                                         if (override_nx(map, src_entry->alias) && prot)
11179                                                 prot |= VM_PROT_EXECUTE;
11180
11181                                         if(map->mapped) {
11182                                                 vm_object_pmap_protect(
11183                                                         src_entry->object.vm_object,
11184                                                         src_entry->offset,
11185                                                         entry_size,
11186                                                         PMAP_NULL,
11187                                                         src_entry->vme_start,
11188                                                         prot);
11189                                         } else {
11190                                                 pmap_protect(vm_map_pmap(map),
11191                                                              src_entry->vme_start,
11192                                                              src_entry->vme_end,
11193                                                              prot);
11194                                         }
11195                                 }
11196
11197                                 object = src_entry->object.vm_object;
11198                                 src_entry->needs_copy = FALSE;
11199                         }
11200
11201
11202                         vm_object_lock(object);
11203                         vm_object_reference_locked(object); /* object ref. for new entry */
11204                         if (object->copy_strategy ==
11205                             MEMORY_OBJECT_COPY_SYMMETRIC) {
11206                                 object->copy_strategy =
11207                                         MEMORY_OBJECT_COPY_DELAY;
11208                         }
11209                         vm_object_unlock(object);
11210                 }
11211
11212                 offset = src_entry->offset + (src_start - src_entry->vme_start);
11213
11214                 new_entry = _vm_map_entry_create(map_header);
11215                 vm_map_entry_copy(new_entry, src_entry);
11216                 new_entry->use_pmap = FALSE; /* clr address space specifics */
11217
11218                 new_entry->vme_start = map_address;
11219                 new_entry->vme_end = map_address + tmp_size;
11220                 new_entry->inheritance = inheritance;
11221                 new_entry->offset = offset;
11222
11223                 /*
11224                  * The new region has to be copied now if required.
11225                  */
11226         RestartCopy:
11227                 if (!copy) {
11228                         src_entry->is_shared = TRUE;
11229                         new_entry->is_shared = TRUE;
11230                         if (!(new_entry->is_sub_map))
11231                                 new_entry->needs_copy = FALSE;
11232
11233                 } else if (src_entry->is_sub_map) {
11234                         /* make this a COW sub_map if not already */
11235                         new_entry->needs_copy = TRUE;
11236                         object = VM_OBJECT_NULL;
11237                 } else if (src_entry->wired_count == 0 &&
11238                            vm_object_copy_quickly(&new_entry->object.vm_object,
11239                                                   new_entry->offset,
11240                                                   (new_entry->vme_end -
11241                                                    new_entry->vme_start),
11242                                                   &src_needs_copy,
11243                                                   &new_entry_needs_copy)) {
11244
11245                         new_entry->needs_copy = new_entry_needs_copy;
11246                         new_entry->is_shared = FALSE;
11247
11248                         /*
11249                          * Handle copy_on_write semantics.
11250                          */
11251                         if (src_needs_copy && !src_entry->needs_copy) {
11252                                 vm_prot_t prot;
11253
11254                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11255
11256                                 if (override_nx(map, src_entry->alias) && prot)
11257                                         prot |= VM_PROT_EXECUTE;
11258
11259                                 vm_object_pmap_protect(object,
11260                                                        offset,
11261                                                        entry_size,
11262                                                        ((src_entry->is_shared
11263                                                          || map->mapped) ?
11264                                                         PMAP_NULL : map->pmap),
11265                                                        src_entry->vme_start,
11266                                                        prot);
11267
11268                                 src_entry->needs_copy = TRUE;
11269                         }
11270                         /*
11271                          * Throw away the old object reference of the new entry.
11272                          */
11273                         vm_object_deallocate(object);
11274
11275                 } else {
11276                         new_entry->is_shared = FALSE;
11277
11278                         /*
11279                          * The map can be safely unlocked since we
11280                          * already hold a reference on the object.
11281                          *
11282                          * Record the timestamp of the map for later
11283                          * verification, and unlock the map.
11284                          */
11285                         version.main_timestamp = map->timestamp;
11286                         vm_map_unlock(map);     /* Increments timestamp once! */
11287
11288                         /*
11289                          * Perform the copy.
11290                          */
11291                         if (src_entry->wired_count > 0) {
11292                                 vm_object_lock(object);
11293                                 result = vm_object_copy_slowly(
11294                                         object,
11295                                         offset,
11296                                         entry_size,
11297                                         THREAD_UNINT,
11298                                         &new_entry->object.vm_object);
11299
11300                                 new_entry->offset = 0;
11301                                 new_entry->needs_copy = FALSE;
11302                         } else {
11303                                 result = vm_object_copy_strategically(
11304                                         object,
11305                                         offset,
11306                                         entry_size,
11307                                         &new_entry->object.vm_object,
11308                                         &new_entry->offset,
11309                                         &new_entry_needs_copy);
11310
11311                                 new_entry->needs_copy = new_entry_needs_copy;
11312                         }
11313
11314                         /*
11315                          * Throw away the old object reference of the new entry.
11316                          */
11317                         vm_object_deallocate(object);
11318
11319                         if (result != KERN_SUCCESS &&
11320                             result != KERN_MEMORY_RESTART_COPY) {
11321                                 _vm_map_entry_dispose(map_header, new_entry);
11322                                 break;
11323                         }
11324
11325                         /*
11326                          * Verify that the map has not substantially
11327                          * changed while the copy was being made.
11328                          */
11329
11330                         vm_map_lock(map);
11331                         if (version.main_timestamp + 1 != map->timestamp) {
11332                                 /*
11333                                  * Simple version comparison failed.
11334                                  *
11335                                  * Retry the lookup and verify that the
11336                                  * same object/offset are still present.
11337                                  */
11338                                 vm_object_deallocate(new_entry->
11339                                                      object.vm_object);
11340                                 _vm_map_entry_dispose(map_header, new_entry);
11341                                 if (result == KERN_MEMORY_RESTART_COPY)
11342                                         result = KERN_SUCCESS;
11343                                 continue;
11344                         }
11345
11346                         if (result == KERN_MEMORY_RESTART_COPY) {
11347                                 vm_object_reference(object);
11348                                 goto RestartCopy;
11349                         }
11350                 }
11351
11352                 _vm_map_store_entry_link(map_header,
11353                                    map_header->links.prev, new_entry);
11354
11355                 /*Protections for submap mapping are irrelevant here*/
11356                 if( !src_entry->is_sub_map ) {
11357                         *cur_protection &= src_entry->protection;
11358                         *max_protection &= src_entry->max_protection;
11359                 }
11360                 map_address += tmp_size;
11361                 mapped_size += tmp_size;
11362                 src_start += tmp_size;
11363
11364         } /* end while */
11365
11366         vm_map_unlock(map);
11367         if (result != KERN_SUCCESS) {
11368                 /*
11369                  * Free all allocated elements.
11370                  */
11371                 for (src_entry = map_header->links.next;
11372                      src_entry != (struct vm_map_entry *)&map_header->links;
11373                      src_entry = new_entry) {
11374                         new_entry = src_entry->vme_next;
11375                         _vm_map_store_entry_unlink(map_header, src_entry);
11376                         vm_object_deallocate(src_entry->object.vm_object);
11377                         _vm_map_entry_dispose(map_header, src_entry);
11378                 }
11379         }
11380         return result;
11381 }
11382
11383 /*
11384  *      Routine:        vm_remap
11385  *
11386  *                      Map portion of a task's address space.
11387  *                      Mapped region must not overlap more than
11388  *                      one vm memory object. Protections and
11389  *                      inheritance attributes remain the same
11390  *                      as in the original task and are out parameters.
11391  *                      Source and Target task can be identical
11392  *                      Other attributes are identical as for vm_map()
11393  */
11394 kern_return_t
11395 vm_map_remap(
11396         vm_map_t                target_map,
11397         vm_map_address_t        *address,
11398         vm_map_size_t           size,
11399         vm_map_offset_t         mask,
11400         int                     flags,
11401         vm_map_t                src_map,
11402         vm_map_offset_t         memory_address,
11403         boolean_t               copy,
11404         vm_prot_t               *cur_protection,
11405         vm_prot_t               *max_protection,
11406         vm_inherit_t            inheritance)
11407 {
11408         kern_return_t           result;
11409         vm_map_entry_t          entry;
11410         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11411         vm_map_entry_t          new_entry;
11412         struct vm_map_header    map_header;
11413
11414         if (target_map == VM_MAP_NULL)
11415                 return KERN_INVALID_ARGUMENT;
11416
11417         switch (inheritance) {
11418         case VM_INHERIT_NONE:
11419         case VM_INHERIT_COPY:
11420         case VM_INHERIT_SHARE:
11421                 if (size != 0 && src_map != VM_MAP_NULL)
11422                         break;
11423                 /*FALL THRU*/
11424         default:
11425                 return KERN_INVALID_ARGUMENT;
11426         }
11427
11428         size = vm_map_round_page(size);
11429
11430         result = vm_map_remap_extract(src_map, memory_address,
11431                                       size, copy, &map_header,
11432                                       cur_protection,
11433                                       max_protection,
11434                                       inheritance,
11435                                       target_map->hdr.
11436                                       entries_pageable);
11437
11438         if (result != KERN_SUCCESS) {
11439                 return result;
11440         }
11441
11442         /*
11443          * Allocate/check a range of free virtual address
11444          * space for the target
11445          */
11446         *address = vm_map_trunc_page(*address);
11447         vm_map_lock(target_map);
11448         result = vm_map_remap_range_allocate(target_map, address, size,
11449                                              mask, flags, &insp_entry);
11450
11451         for (entry = map_header.links.next;
11452              entry != (struct vm_map_entry *)&map_header.links;
11453              entry = new_entry) {
11454                 new_entry = entry->vme_next;
11455                 _vm_map_store_entry_unlink(&map_header, entry);
11456                 if (result == KERN_SUCCESS) {
11457                         entry->vme_start += *address;
11458                         entry->vme_end += *address;
11459                         vm_map_store_entry_link(target_map, insp_entry, entry);
11460                         insp_entry = entry;
11461                 } else {
11462                         if (!entry->is_sub_map) {
11463                                 vm_object_deallocate(entry->object.vm_object);
11464                         } else {
11465                                 vm_map_deallocate(entry->object.sub_map);
11466                         }
11467                         _vm_map_entry_dispose(&map_header, entry);
11468                 }
11469         }
11470
11471         if( target_map->disable_vmentry_reuse == TRUE) {
11472                 if( target_map->highest_entry_end < insp_entry->vme_end ){
11473                         target_map->highest_entry_end = insp_entry->vme_end;
11474                 }
11475         }
11476
11477         if (result == KERN_SUCCESS) {
11478                 target_map->size += size;
11479                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11480         }
11481         vm_map_unlock(target_map);
11482
11483         if (result == KERN_SUCCESS && target_map->wiring_required)
11484                 result = vm_map_wire(target_map, *address,
11485                                      *address + size, *cur_protection, TRUE);
11486         return result;
11487 }
11488
11489 /*
11490  *      Routine:        vm_map_remap_range_allocate
11491  *
11492  *      Description:
11493  *              Allocate a range in the specified virtual address map.
11494  *              returns the address and the map entry just before the allocated
11495  *              range
11496  *
11497  *      Map must be locked.
11498  */
11499
11500 static kern_return_t
11501 vm_map_remap_range_allocate(
11502         vm_map_t                map,
11503         vm_map_address_t        *address,       /* IN/OUT */
11504         vm_map_size_t           size,
11505         vm_map_offset_t         mask,
11506         int                     flags,
11507         vm_map_entry_t          *map_entry)     /* OUT */
11508 {
11509         vm_map_entry_t  entry;
11510         vm_map_offset_t start;
11511         vm_map_offset_t end;
11512         kern_return_t   kr;
11513
11514 StartAgain: ;
11515
11516         start = *address;
11517
11518         if (flags & VM_FLAGS_ANYWHERE)
11519         {
11520                 /*
11521                  *      Calculate the first possible address.
11522                  */
11523
11524                 if (start < map->min_offset)
11525                         start = map->min_offset;
11526                 if (start > map->max_offset)
11527                         return(KERN_NO_SPACE);
11528
11529                 /*
11530                  *      Look for the first possible address;
11531                  *      if there's already something at this
11532                  *      address, we have to start after it.
11533                  */
11534
11535                 if( map->disable_vmentry_reuse == TRUE) {
11536                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
11537                 } else {
11538                         assert(first_free_is_valid(map));
11539                         if (start == map->min_offset) {
11540                                 if ((entry = map->first_free) != vm_map_to_entry(map))
11541                                         start = entry->vme_end;
11542                         } else {
11543                                 vm_map_entry_t  tmp_entry;
11544                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
11545                                         start = tmp_entry->vme_end;
11546                                 entry = tmp_entry;
11547                         }
11548                 }
11549
11550                 /*
11551                  *      In any case, the "entry" always precedes
11552                  *      the proposed new region throughout the
11553                  *      loop:
11554                  */
11555
11556                 while (TRUE) {
11557                         register vm_map_entry_t next;
11558
11559                         /*
11560                          *      Find the end of the proposed new region.
11561                          *      Be sure we didn't go beyond the end, or
11562                          *      wrap around the address.
11563                          */
11564
11565                         end = ((start + mask) & ~mask);
11566                         if (end < start)
11567                                 return(KERN_NO_SPACE);
11568                         start = end;
11569                         end += size;
11570
11571                         if ((end > map->max_offset) || (end < start)) {
11572                                 if (map->wait_for_space) {
11573                                         if (size <= (map->max_offset -
11574                                                      map->min_offset)) {
11575                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11576                                                 vm_map_unlock(map);
11577                                                 thread_block(THREAD_CONTINUE_NULL);
11578                                                 vm_map_lock(map);
11579                                                 goto StartAgain;
11580                                         }
11581                                 }
11582
11583                                 return(KERN_NO_SPACE);
11584                         }
11585
11586                         /*
11587                          *      If there are no more entries, we must win.
11588                          */
11589
11590                         next = entry->vme_next;
11591                         if (next == vm_map_to_entry(map))
11592                                 break;
11593
11594                         /*
11595                          *      If there is another entry, it must be
11596                          *      after the end of the potential new region.
11597                          */
11598
11599                         if (next->vme_start >= end)
11600                                 break;
11601
11602                         /*
11603                          *      Didn't fit -- move to the next entry.
11604                          */
11605
11606                         entry = next;
11607                         start = entry->vme_end;
11608                 }
11609                 *address = start;
11610         } else {
11611                 vm_map_entry_t          temp_entry;
11612
11613                 /*
11614                  *      Verify that:
11615                  *              the address doesn't itself violate
11616                  *              the mask requirement.
11617                  */
11618
11619                 if ((start & mask) != 0)
11620                         return(KERN_NO_SPACE);
11621
11622
11623                 /*
11624                  *      ...     the address is within bounds
11625                  */
11626
11627                 end = start + size;
11628
11629                 if ((start < map->min_offset) ||
11630                     (end > map->max_offset) ||
11631                     (start >= end)) {
11632                         return(KERN_INVALID_ADDRESS);
11633                 }
11634
11635                 /*
11636                  * If we're asked to overwrite whatever was mapped in that
11637                  * range, first deallocate that range.
11638                  */
11639                 if (flags & VM_FLAGS_OVERWRITE) {
11640                         vm_map_t zap_map;
11641
11642                         /*
11643                          * We use a "zap_map" to avoid having to unlock
11644                          * the "map" in vm_map_delete(), which would compromise
11645                          * the atomicity of the "deallocate" and then "remap"
11646                          * combination.
11647                          */
11648                         zap_map = vm_map_create(PMAP_NULL,
11649                                                 start,
11650                                                 end - start,
11651                                                 map->hdr.entries_pageable);
11652                         if (zap_map == VM_MAP_NULL) {
11653                                 return KERN_RESOURCE_SHORTAGE;
11654                         }
11655
11656                         kr = vm_map_delete(map, start, end,
11657                                            VM_MAP_REMOVE_SAVE_ENTRIES,
11658                                            zap_map);
11659                         if (kr == KERN_SUCCESS) {
11660                                 vm_map_destroy(zap_map,
11661                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11662                                 zap_map = VM_MAP_NULL;
11663                         }
11664                 }
11665
11666                 /*
11667                  *      ...     the starting address isn't allocated
11668                  */
11669
11670                 if (vm_map_lookup_entry(map, start, &temp_entry))
11671                         return(KERN_NO_SPACE);
11672
11673                 entry = temp_entry;
11674
11675                 /*
11676                  *      ...     the next region doesn't overlap the
11677                  *              end point.
11678                  */
11679
11680                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11681                     (entry->vme_next->vme_start < end))
11682                         return(KERN_NO_SPACE);
11683         }
11684         *map_entry = entry;
11685         return(KERN_SUCCESS);
11686 }
11687
11688 /*
11689  *      vm_map_switch:
11690  *
11691  *      Set the address map for the current thread to the specified map
11692  */
11693
11694 vm_map_t
11695 vm_map_switch(
11696         vm_map_t        map)
11697 {
11698         int             mycpu;
11699         thread_t        thread = current_thread();
11700         vm_map_t        oldmap = thread->map;
11701
11702         mp_disable_preemption();
11703         mycpu = cpu_number();
11704
11705         /*
11706          *      Deactivate the current map and activate the requested map
11707          */
11708         PMAP_SWITCH_USER(thread, map, mycpu);
11709
11710         mp_enable_preemption();
11711         return(oldmap);
11712 }
11713
11714
11715 /*
11716  *      Routine:        vm_map_write_user
11717  *
11718  *      Description:
11719  *              Copy out data from a kernel space into space in the
11720  *              destination map. The space must already exist in the
11721  *              destination map.
11722  *              NOTE:  This routine should only be called by threads
11723  *              which can block on a page fault. i.e. kernel mode user
11724  *              threads.
11725  *
11726  */
11727 kern_return_t
11728 vm_map_write_user(
11729         vm_map_t                map,
11730         void                    *src_p,
11731         vm_map_address_t        dst_addr,
11732         vm_size_t               size)
11733 {
11734         kern_return_t   kr = KERN_SUCCESS;
11735
11736         if(current_map() == map) {
11737                 if (copyout(src_p, dst_addr, size)) {
11738                         kr = KERN_INVALID_ADDRESS;
11739                 }
11740         } else {
11741                 vm_map_t        oldmap;
11742
11743                 /* take on the identity of the target map while doing */
11744                 /* the transfer */
11745
11746                 vm_map_reference(map);
11747                 oldmap = vm_map_switch(map);
11748                 if (copyout(src_p, dst_addr, size)) {
11749                         kr = KERN_INVALID_ADDRESS;
11750                 }
11751                 vm_map_switch(oldmap);
11752                 vm_map_deallocate(map);
11753         }
11754         return kr;
11755 }
11756
11757 /*
11758  *      Routine:        vm_map_read_user
11759  *
11760  *      Description:
11761  *              Copy in data from a user space source map into the
11762  *              kernel map. The space must already exist in the
11763  *              kernel map.
11764  *              NOTE:  This routine should only be called by threads
11765  *              which can block on a page fault. i.e. kernel mode user
11766  *              threads.
11767  *
11768  */
11769 kern_return_t
11770 vm_map_read_user(
11771         vm_map_t                map,
11772         vm_map_address_t        src_addr,
11773         void                    *dst_p,
11774         vm_size_t               size)
11775 {
11776         kern_return_t   kr = KERN_SUCCESS;
11777
11778         if(current_map() == map) {
11779                 if (copyin(src_addr, dst_p, size)) {
11780                         kr = KERN_INVALID_ADDRESS;
11781                 }
11782         } else {
11783                 vm_map_t        oldmap;
11784
11785                 /* take on the identity of the target map while doing */
11786                 /* the transfer */
11787
11788                 vm_map_reference(map);
11789                 oldmap = vm_map_switch(map);
11790                 if (copyin(src_addr, dst_p, size)) {
11791                         kr = KERN_INVALID_ADDRESS;
11792                 }
11793                 vm_map_switch(oldmap);
11794                 vm_map_deallocate(map);
11795         }
11796         return kr;
11797 }
11798
11799
11800 /*
11801  *      vm_map_check_protection:
11802  *
11803  *      Assert that the target map allows the specified
11804  *      privilege on the entire address region given.
11805  *      The entire region must be allocated.
11806  */
11807 boolean_t
11808 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11809                         vm_map_offset_t end, vm_prot_t protection)
11810 {
11811         vm_map_entry_t entry;
11812         vm_map_entry_t tmp_entry;
11813
11814         vm_map_lock(map);
11815
11816         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11817         {
11818                 vm_map_unlock(map);
11819                 return (FALSE);
11820         }
11821
11822         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11823                 vm_map_unlock(map);
11824                 return(FALSE);
11825         }
11826
11827         entry = tmp_entry;
11828
11829         while (start < end) {
11830                 if (entry == vm_map_to_entry(map)) {
11831                         vm_map_unlock(map);
11832                         return(FALSE);
11833                 }
11834
11835                 /*
11836                  *      No holes allowed!
11837                  */
11838
11839                 if (start < entry->vme_start) {
11840                         vm_map_unlock(map);
11841                         return(FALSE);
11842                 }
11843
11844                 /*
11845                  * Check protection associated with entry.
11846                  */
11847
11848                 if ((entry->protection & protection) != protection) {
11849                         vm_map_unlock(map);
11850                         return(FALSE);
11851                 }
11852
11853                 /* go to next entry */
11854
11855                 start = entry->vme_end;
11856                 entry = entry->vme_next;
11857         }
11858         vm_map_unlock(map);
11859         return(TRUE);
11860 }
11861
11862 kern_return_t
11863 vm_map_purgable_control(
11864         vm_map_t                map,
11865         vm_map_offset_t         address,
11866         vm_purgable_t           control,
11867         int                     *state)
11868 {
11869         vm_map_entry_t          entry;
11870         vm_object_t             object;
11871         kern_return_t           kr;
11872
11873         /*
11874          * Vet all the input parameters and current type and state of the
11875          * underlaying object.  Return with an error if anything is amiss.
11876          */
11877         if (map == VM_MAP_NULL)
11878                 return(KERN_INVALID_ARGUMENT);
11879
11880         if (control != VM_PURGABLE_SET_STATE &&
11881             control != VM_PURGABLE_GET_STATE &&
11882             control != VM_PURGABLE_PURGE_ALL)
11883                 return(KERN_INVALID_ARGUMENT);
11884
11885         if (control == VM_PURGABLE_PURGE_ALL) {
11886                 vm_purgeable_object_purge_all();
11887                 return KERN_SUCCESS;
11888         }
11889
11890         if (control == VM_PURGABLE_SET_STATE &&
11891             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11892              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11893                 return(KERN_INVALID_ARGUMENT);
11894
11895         vm_map_lock_read(map);
11896
11897         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11898
11899                 /*
11900                  * Must pass a valid non-submap address.
11901                  */
11902                 vm_map_unlock_read(map);
11903                 return(KERN_INVALID_ADDRESS);
11904         }
11905
11906         if ((entry->protection & VM_PROT_WRITE) == 0) {
11907                 /*
11908                  * Can't apply purgable controls to something you can't write.
11909                  */
11910                 vm_map_unlock_read(map);
11911                 return(KERN_PROTECTION_FAILURE);
11912         }
11913
11914         object = entry->object.vm_object;
11915         if (object == VM_OBJECT_NULL) {
11916                 /*
11917                  * Object must already be present or it can't be purgable.
11918                  */
11919                 vm_map_unlock_read(map);
11920                 return KERN_INVALID_ARGUMENT;
11921         }
11922
11923         vm_object_lock(object);
11924
11925         if (entry->offset != 0 ||
11926             entry->vme_end - entry->vme_start != object->vo_size) {
11927                 /*
11928                  * Can only apply purgable controls to the whole (existing)
11929                  * object at once.
11930                  */
11931                 vm_map_unlock_read(map);
11932                 vm_object_unlock(object);
11933                 return KERN_INVALID_ARGUMENT;
11934         }
11935
11936         vm_map_unlock_read(map);
11937
11938         kr = vm_object_purgable_control(object, control, state);
11939
11940         vm_object_unlock(object);
11941
11942         return kr;
11943 }
11944
11945 kern_return_t
11946 vm_map_page_query_internal(
11947         vm_map_t        target_map,
11948         vm_map_offset_t offset,
11949         int             *disposition,
11950         int             *ref_count)
11951 {
11952         kern_return_t                   kr;
11953         vm_page_info_basic_data_t       info;
11954         mach_msg_type_number_t          count;
11955
11956         count = VM_PAGE_INFO_BASIC_COUNT;
11957         kr = vm_map_page_info(target_map,
11958                               offset,
11959                               VM_PAGE_INFO_BASIC,
11960                               (vm_page_info_t) &info,
11961                               &count);
11962         if (kr == KERN_SUCCESS) {
11963                 *disposition = info.disposition;
11964                 *ref_count = info.ref_count;
11965         } else {
11966                 *disposition = 0;
11967                 *ref_count = 0;
11968         }
11969
11970         return kr;
11971 }
11972
11973 kern_return_t
11974 vm_map_page_info(
11975         vm_map_t                map,
11976         vm_map_offset_t         offset,
11977         vm_page_info_flavor_t   flavor,
11978         vm_page_info_t          info,
11979         mach_msg_type_number_t  *count)
11980 {
11981         vm_map_entry_t          map_entry;
11982         vm_object_t             object;
11983         vm_page_t               m;
11984         kern_return_t           kr;
11985         kern_return_t           retval = KERN_SUCCESS;
11986         boolean_t               top_object;
11987         int                     disposition;
11988         int                     ref_count;
11989         vm_object_id_t          object_id;
11990         vm_page_info_basic_t    basic_info;
11991         int                     depth;
11992         vm_map_offset_t         offset_in_page;
11993
11994         switch (flavor) {
11995         case VM_PAGE_INFO_BASIC:
11996                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
11997                         /*
11998                          * The "vm_page_info_basic_data" structure was not
11999                          * properly padded, so allow the size to be off by
12000                          * one to maintain backwards binary compatibility...
12001                          */
12002                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12003                                 return KERN_INVALID_ARGUMENT;
12004                 }
12005                 break;
12006         default:
12007                 return KERN_INVALID_ARGUMENT;
12008         }
12009
12010         disposition = 0;
12011         ref_count = 0;
12012         object_id = 0;
12013         top_object = TRUE;
12014         depth = 0;
12015
12016         retval = KERN_SUCCESS;
12017         offset_in_page = offset & PAGE_MASK;
12018         offset = vm_map_trunc_page(offset);
12019
12020         vm_map_lock_read(map);
12021
12022         /*
12023          * First, find the map entry covering "offset", going down
12024          * submaps if necessary.
12025          */
12026         for (;;) {
12027                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12028                         vm_map_unlock_read(map);
12029                         return KERN_INVALID_ADDRESS;
12030                 }
12031                 /* compute offset from this map entry's start */
12032                 offset -= map_entry->vme_start;
12033                 /* compute offset into this map entry's object (or submap) */
12034                 offset += map_entry->offset;
12035
12036                 if (map_entry->is_sub_map) {
12037                         vm_map_t sub_map;
12038
12039                         sub_map = map_entry->object.sub_map;
12040                         vm_map_lock_read(sub_map);
12041                         vm_map_unlock_read(map);
12042
12043                         map = sub_map;
12044
12045                         ref_count = MAX(ref_count, map->ref_count);
12046                         continue;
12047                 }
12048                 break;
12049         }
12050
12051         object = map_entry->object.vm_object;
12052         if (object == VM_OBJECT_NULL) {
12053                 /* no object -> no page */
12054                 vm_map_unlock_read(map);
12055                 goto done;
12056         }
12057
12058         vm_object_lock(object);
12059         vm_map_unlock_read(map);
12060
12061         /*
12062          * Go down the VM object shadow chain until we find the page
12063          * we're looking for.
12064          */
12065         for (;;) {
12066                 ref_count = MAX(ref_count, object->ref_count);
12067
12068                 m = vm_page_lookup(object, offset);
12069
12070                 if (m != VM_PAGE_NULL) {
12071                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12072                         break;
12073                 } else {
12074 #if MACH_PAGEMAP
12075                         if (object->existence_map) {
12076                                 if (vm_external_state_get(object->existence_map,
12077                                                           offset) ==
12078                                     VM_EXTERNAL_STATE_EXISTS) {
12079                                         /*
12080                                          * this page has been paged out
12081                                          */
12082                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12083                                         break;
12084                                 }
12085                         } else
12086 #endif
12087                         {
12088                                 if (object->internal &&
12089                                     object->alive &&
12090                                     !object->terminating &&
12091                                     object->pager_ready) {
12092
12093                                         memory_object_t pager;
12094
12095                                         vm_object_paging_begin(object);
12096                                         pager = object->pager;
12097                                         vm_object_unlock(object);
12098
12099                                         /*
12100                                          * Ask the default pager if
12101                                          * it has this page.
12102                                          */
12103                                         kr = memory_object_data_request(
12104                                                 pager,
12105                                                 offset + object->paging_offset,
12106                                                 0, /* just poke the pager */
12107                                                 VM_PROT_READ,
12108                                                 NULL);
12109
12110                                         vm_object_lock(object);
12111                                         vm_object_paging_end(object);
12112
12113                                         if (kr == KERN_SUCCESS) {
12114                                                 /* the default pager has it */
12115                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12116                                                 break;
12117                                         }
12118                                 }
12119                         }
12120
12121                         if (object->shadow != VM_OBJECT_NULL) {
12122                                 vm_object_t shadow;
12123
12124                                 offset += object->vo_shadow_offset;
12125                                 shadow = object->shadow;
12126
12127                                 vm_object_lock(shadow);
12128                                 vm_object_unlock(object);
12129
12130                                 object = shadow;
12131                                 top_object = FALSE;
12132                                 depth++;
12133                         } else {
12134 //                              if (!object->internal)
12135 //                                      break;
12136 //                              retval = KERN_FAILURE;
12137 //                              goto done_with_object;
12138                                 break;
12139                         }
12140                 }
12141         }
12142         /* The ref_count is not strictly accurate, it measures the number   */
12143         /* of entities holding a ref on the object, they may not be mapping */
12144         /* the object or may not be mapping the section holding the         */
12145         /* target page but its still a ball park number and though an over- */
12146         /* count, it picks up the copy-on-write cases                       */
12147
12148         /* We could also get a picture of page sharing from pmap_attributes */
12149         /* but this would under count as only faulted-in mappings would     */
12150         /* show up.                                                         */
12151
12152         if (top_object == TRUE && object->shadow)
12153                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12154
12155         if (! object->internal)
12156                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12157
12158         if (m == VM_PAGE_NULL)
12159                 goto done_with_object;
12160
12161         if (m->fictitious) {
12162                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12163                 goto done_with_object;
12164         }
12165         if (m->dirty || pmap_is_modified(m->phys_page))
12166                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12167
12168         if (m->reference || pmap_is_referenced(m->phys_page))
12169                 disposition |= VM_PAGE_QUERY_PAGE_REF;
12170
12171         if (m->speculative)
12172                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12173
12174         if (m->cs_validated)
12175                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12176         if (m->cs_tainted)
12177                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12178
12179 done_with_object:
12180         vm_object_unlock(object);
12181 done:
12182
12183         switch (flavor) {
12184         case VM_PAGE_INFO_BASIC:
12185                 basic_info = (vm_page_info_basic_t) info;
12186                 basic_info->disposition = disposition;
12187                 basic_info->ref_count = ref_count;
12188                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12189                 basic_info->offset =
12190                         (memory_object_offset_t) offset + offset_in_page;
12191                 basic_info->depth = depth;
12192                 break;
12193         }
12194
12195         return retval;
12196 }
12197
12198 /*
12199  *      vm_map_msync
12200  *
12201  *      Synchronises the memory range specified with its backing store
12202  *      image by either flushing or cleaning the contents to the appropriate
12203  *      memory manager engaging in a memory object synchronize dialog with
12204  *      the manager.  The client doesn't return until the manager issues
12205  *      m_o_s_completed message.  MIG Magically converts user task parameter
12206  *      to the task's address map.
12207  *
12208  *      interpretation of sync_flags
12209  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
12210  *                                pages to manager.
12211  *
12212  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12213  *                              - discard pages, write dirty or precious
12214  *                                pages back to memory manager.
12215  *
12216  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12217  *                              - write dirty or precious pages back to
12218  *                                the memory manager.
12219  *
12220  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
12221  *                                is a hole in the region, and we would
12222  *                                have returned KERN_SUCCESS, return
12223  *                                KERN_INVALID_ADDRESS instead.
12224  *
12225  *      NOTE
12226  *      The memory object attributes have not yet been implemented, this
12227  *      function will have to deal with the invalidate attribute
12228  *
12229  *      RETURNS
12230  *      KERN_INVALID_TASK               Bad task parameter
12231  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
12232  *      KERN_SUCCESS                    The usual.
12233  *      KERN_INVALID_ADDRESS            There was a hole in the region.
12234  */
12235
12236 kern_return_t
12237 vm_map_msync(
12238         vm_map_t                map,
12239         vm_map_address_t        address,
12240         vm_map_size_t           size,
12241         vm_sync_t               sync_flags)
12242 {
12243         msync_req_t             msr;
12244         msync_req_t             new_msr;
12245         queue_chain_t           req_q;  /* queue of requests for this msync */
12246         vm_map_entry_t          entry;
12247         vm_map_size_t           amount_left;
12248         vm_object_offset_t      offset;
12249         boolean_t               do_sync_req;
12250         boolean_t               had_hole = FALSE;
12251         memory_object_t         pager;
12252
12253         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12254             (sync_flags & VM_SYNC_SYNCHRONOUS))
12255                 return(KERN_INVALID_ARGUMENT);
12256
12257         /*
12258          * align address and size on page boundaries
12259          */
12260         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12261         address = vm_map_trunc_page(address);
12262
12263         if (map == VM_MAP_NULL)
12264                 return(KERN_INVALID_TASK);
12265
12266         if (size == 0)
12267                 return(KERN_SUCCESS);
12268
12269         queue_init(&req_q);
12270         amount_left = size;
12271
12272         while (amount_left > 0) {
12273                 vm_object_size_t        flush_size;
12274                 vm_object_t             object;
12275
12276                 vm_map_lock(map);
12277                 if (!vm_map_lookup_entry(map,
12278                                          vm_map_trunc_page(address), &entry)) {
12279
12280                         vm_map_size_t   skip;
12281
12282                         /*
12283                          * hole in the address map.
12284                          */
12285                         had_hole = TRUE;
12286
12287                         /*
12288                          * Check for empty map.
12289                          */
12290                         if (entry == vm_map_to_entry(map) &&
12291                             entry->vme_next == entry) {
12292                                 vm_map_unlock(map);
12293                                 break;
12294                         }
12295                         /*
12296                          * Check that we don't wrap and that
12297                          * we have at least one real map entry.
12298                          */
12299                         if ((map->hdr.nentries == 0) ||
12300                             (entry->vme_next->vme_start < address)) {
12301                                 vm_map_unlock(map);
12302                                 break;
12303                         }
12304                         /*
12305                          * Move up to the next entry if needed
12306                          */
12307                         skip = (entry->vme_next->vme_start - address);
12308                         if (skip >= amount_left)
12309                                 amount_left = 0;
12310                         else
12311                                 amount_left -= skip;
12312                         address = entry->vme_next->vme_start;
12313                         vm_map_unlock(map);
12314                         continue;
12315                 }
12316
12317                 offset = address - entry->vme_start;
12318
12319                 /*
12320                  * do we have more to flush than is contained in this
12321                  * entry ?
12322                  */
12323                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12324                         flush_size = entry->vme_end -
12325                                 (entry->vme_start + offset);
12326                 } else {
12327                         flush_size = amount_left;
12328                 }
12329                 amount_left -= flush_size;
12330                 address += flush_size;
12331
12332                 if (entry->is_sub_map == TRUE) {
12333                         vm_map_t        local_map;
12334                         vm_map_offset_t local_offset;
12335
12336                         local_map = entry->object.sub_map;
12337                         local_offset = entry->offset;
12338                         vm_map_unlock(map);
12339                         if (vm_map_msync(
12340                                     local_map,
12341                                     local_offset,
12342                                     flush_size,
12343                                     sync_flags) == KERN_INVALID_ADDRESS) {
12344                                 had_hole = TRUE;
12345                         }
12346                         continue;
12347                 }
12348                 object = entry->object.vm_object;
12349
12350                 /*
12351                  * We can't sync this object if the object has not been
12352                  * created yet
12353                  */
12354                 if (object == VM_OBJECT_NULL) {
12355                         vm_map_unlock(map);
12356                         continue;
12357                 }
12358                 offset += entry->offset;
12359
12360                 vm_object_lock(object);
12361
12362                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12363                         int kill_pages = 0;
12364                         boolean_t reusable_pages = FALSE;
12365
12366                         if (sync_flags & VM_SYNC_KILLPAGES) {
12367                                 if (object->ref_count == 1 && !object->shadow)
12368                                         kill_pages = 1;
12369                                 else
12370                                         kill_pages = -1;
12371                         }
12372                         if (kill_pages != -1)
12373                                 vm_object_deactivate_pages(object, offset,
12374                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12375                         vm_object_unlock(object);
12376                         vm_map_unlock(map);
12377                         continue;
12378                 }
12379                 /*
12380                  * We can't sync this object if there isn't a pager.
12381                  * Don't bother to sync internal objects, since there can't
12382                  * be any "permanent" storage for these objects anyway.
12383                  */
12384                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12385                     (object->internal) || (object->private)) {
12386                         vm_object_unlock(object);
12387                         vm_map_unlock(map);
12388                         continue;
12389                 }
12390                 /*
12391                  * keep reference on the object until syncing is done
12392                  */
12393                 vm_object_reference_locked(object);
12394                 vm_object_unlock(object);
12395
12396                 vm_map_unlock(map);
12397
12398                 do_sync_req = vm_object_sync(object,
12399                                              offset,
12400                                              flush_size,
12401                                              sync_flags & VM_SYNC_INVALIDATE,
12402                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12403                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12404                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12405                 /*
12406                  * only send a m_o_s if we returned pages or if the entry
12407                  * is writable (ie dirty pages may have already been sent back)
12408                  */
12409                 if (!do_sync_req) {
12410                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12411                                 /*
12412                                  * clear out the clustering and read-ahead hints
12413                                  */
12414                                 vm_object_lock(object);
12415
12416                                 object->pages_created = 0;
12417                                 object->pages_used = 0;
12418                                 object->sequential = 0;
12419                                 object->last_alloc = 0;
12420
12421                                 vm_object_unlock(object);
12422                         }
12423                         vm_object_deallocate(object);
12424                         continue;
12425                 }
12426                 msync_req_alloc(new_msr);
12427
12428                 vm_object_lock(object);
12429                 offset += object->paging_offset;
12430
12431                 new_msr->offset = offset;
12432                 new_msr->length = flush_size;
12433                 new_msr->object = object;
12434                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12435         re_iterate:
12436
12437                 /*
12438                  * We can't sync this object if there isn't a pager.  The
12439                  * pager can disappear anytime we're not holding the object
12440                  * lock.  So this has to be checked anytime we goto re_iterate.
12441                  */
12442
12443                 pager = object->pager;
12444
12445                 if (pager == MEMORY_OBJECT_NULL) {
12446                         vm_object_unlock(object);
12447                         vm_object_deallocate(object);
12448                         continue;
12449                 }
12450
12451                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12452                         /*
12453                          * need to check for overlapping entry, if found, wait
12454                          * on overlapping msr to be done, then reiterate
12455                          */
12456                         msr_lock(msr);
12457                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12458                             ((offset >= msr->offset &&
12459                               offset < (msr->offset + msr->length)) ||
12460                              (msr->offset >= offset &&
12461                               msr->offset < (offset + flush_size))))
12462                         {
12463                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12464                                 msr_unlock(msr);
12465                                 vm_object_unlock(object);
12466                                 thread_block(THREAD_CONTINUE_NULL);
12467                                 vm_object_lock(object);
12468                                 goto re_iterate;
12469                         }
12470                         msr_unlock(msr);
12471                 }/* queue_iterate */
12472
12473                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12474
12475                 vm_object_paging_begin(object);
12476                 vm_object_unlock(object);
12477
12478                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12479
12480                 (void) memory_object_synchronize(
12481                         pager,
12482                         offset,
12483                         flush_size,
12484                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12485
12486                 vm_object_lock(object);
12487                 vm_object_paging_end(object);
12488                 vm_object_unlock(object);
12489         }/* while */
12490
12491         /*
12492          * wait for memory_object_sychronize_completed messages from pager(s)
12493          */
12494
12495         while (!queue_empty(&req_q)) {
12496                 msr = (msync_req_t)queue_first(&req_q);
12497                 msr_lock(msr);
12498                 while(msr->flag != VM_MSYNC_DONE) {
12499                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12500                         msr_unlock(msr);
12501                         thread_block(THREAD_CONTINUE_NULL);
12502                         msr_lock(msr);
12503                 }/* while */
12504                 queue_remove(&req_q, msr, msync_req_t, req_q);
12505                 msr_unlock(msr);
12506                 vm_object_deallocate(msr->object);
12507                 msync_req_free(msr);
12508         }/* queue_iterate */
12509
12510         /* for proper msync() behaviour */
12511         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12512                 return(KERN_INVALID_ADDRESS);
12513
12514         return(KERN_SUCCESS);
12515 }/* vm_msync */
12516
12517 /*
12518  *      Routine:        convert_port_entry_to_map
12519  *      Purpose:
12520  *              Convert from a port specifying an entry or a task
12521  *              to a map. Doesn't consume the port ref; produces a map ref,
12522  *              which may be null.  Unlike convert_port_to_map, the
12523  *              port may be task or a named entry backed.
12524  *      Conditions:
12525  *              Nothing locked.
12526  */
12527
12528
12529 vm_map_t
12530 convert_port_entry_to_map(
12531         ipc_port_t      port)
12532 {
12533         vm_map_t map;
12534         vm_named_entry_t        named_entry;
12535         uint32_t        try_failed_count = 0;
12536
12537         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12538                 while(TRUE) {
12539                         ip_lock(port);
12540                         if(ip_active(port) && (ip_kotype(port)
12541                                                == IKOT_NAMED_ENTRY)) {
12542                                 named_entry =
12543                                         (vm_named_entry_t)port->ip_kobject;
12544                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12545                                         ip_unlock(port);
12546
12547                                         try_failed_count++;
12548                                         mutex_pause(try_failed_count);
12549                                         continue;
12550                                 }
12551                                 named_entry->ref_count++;
12552                                 lck_mtx_unlock(&(named_entry)->Lock);
12553                                 ip_unlock(port);
12554                                 if ((named_entry->is_sub_map) &&
12555                                     (named_entry->protection
12556                                      & VM_PROT_WRITE)) {
12557                                         map = named_entry->backing.map;
12558                                 } else {
12559                                         mach_destroy_memory_entry(port);
12560                                         return VM_MAP_NULL;
12561                                 }
12562                                 vm_map_reference_swap(map);
12563                                 mach_destroy_memory_entry(port);
12564                                 break;
12565                         }
12566                         else
12567                                 return VM_MAP_NULL;
12568                 }
12569         }
12570         else
12571                 map = convert_port_to_map(port);
12572
12573         return map;
12574 }
12575
12576 /*
12577  *      Routine:        convert_port_entry_to_object
12578  *      Purpose:
12579  *              Convert from a port specifying a named entry to an
12580  *              object. Doesn't consume the port ref; produces a map ref,
12581  *              which may be null.
12582  *      Conditions:
12583  *              Nothing locked.
12584  */
12585
12586
12587 vm_object_t
12588 convert_port_entry_to_object(
12589         ipc_port_t      port)
12590 {
12591         vm_object_t object;
12592         vm_named_entry_t        named_entry;
12593         uint32_t        try_failed_count = 0;
12594
12595         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12596                 while(TRUE) {
12597                         ip_lock(port);
12598                         if(ip_active(port) && (ip_kotype(port)
12599                                                == IKOT_NAMED_ENTRY)) {
12600                                 named_entry =
12601                                         (vm_named_entry_t)port->ip_kobject;
12602                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12603                                         ip_unlock(port);
12604
12605                                         try_failed_count++;
12606                                         mutex_pause(try_failed_count);
12607                                         continue;
12608                                 }
12609                                 named_entry->ref_count++;
12610                                 lck_mtx_unlock(&(named_entry)->Lock);
12611                                 ip_unlock(port);
12612                                 if ((!named_entry->is_sub_map) &&
12613                                     (!named_entry->is_pager) &&
12614                                     (named_entry->protection
12615                                      & VM_PROT_WRITE)) {
12616                                         object = named_entry->backing.object;
12617                                 } else {
12618                                         mach_destroy_memory_entry(port);
12619                                         return (vm_object_t)NULL;
12620                                 }
12621                                 vm_object_reference(named_entry->backing.object);
12622                                 mach_destroy_memory_entry(port);
12623                                 break;
12624                         }
12625                         else
12626                                 return (vm_object_t)NULL;
12627                 }
12628         } else {
12629                 return (vm_object_t)NULL;
12630         }
12631
12632         return object;
12633 }
12634
12635 /*
12636  * Export routines to other components for the things we access locally through
12637  * macros.
12638  */
12639 #undef current_map
12640 vm_map_t
12641 current_map(void)
12642 {
12643         return (current_map_fast());
12644 }
12645
12646 /*
12647  *      vm_map_reference:
12648  *
12649  *      Most code internal to the osfmk will go through a
12650  *      macro defining this.  This is always here for the
12651  *      use of other kernel components.
12652  */
12653 #undef vm_map_reference
12654 void
12655 vm_map_reference(
12656         register vm_map_t       map)
12657 {
12658         if (map == VM_MAP_NULL)
12659                 return;
12660
12661         lck_mtx_lock(&map->s_lock);
12662 #if     TASK_SWAPPER
12663         assert(map->res_count > 0);
12664         assert(map->ref_count >= map->res_count);
12665         map->res_count++;
12666 #endif
12667         map->ref_count++;
12668         lck_mtx_unlock(&map->s_lock);
12669 }
12670
12671 /*
12672  *      vm_map_deallocate:
12673  *
12674  *      Removes a reference from the specified map,
12675  *      destroying it if no references remain.
12676  *      The map should not be locked.
12677  */
12678 void
12679 vm_map_deallocate(
12680         register vm_map_t       map)
12681 {
12682         unsigned int            ref;
12683
12684         if (map == VM_MAP_NULL)
12685                 return;
12686
12687         lck_mtx_lock(&map->s_lock);
12688         ref = --map->ref_count;
12689         if (ref > 0) {
12690                 vm_map_res_deallocate(map);
12691                 lck_mtx_unlock(&map->s_lock);
12692                 return;
12693         }
12694         assert(map->ref_count == 0);
12695         lck_mtx_unlock(&map->s_lock);
12696
12697 #if     TASK_SWAPPER
12698         /*
12699          * The map residence count isn't decremented here because
12700          * the vm_map_delete below will traverse the entire map,
12701          * deleting entries, and the residence counts on objects
12702          * and sharing maps will go away then.
12703          */
12704 #endif
12705
12706         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12707 }
12708
12709
12710 void
12711 vm_map_disable_NX(vm_map_t map)
12712 {
12713         if (map == NULL)
12714                 return;
12715         if (map->pmap == NULL)
12716                 return;
12717
12718         pmap_disable_NX(map->pmap);
12719 }
12720
12721 void
12722 vm_map_disallow_data_exec(vm_map_t map)
12723 {
12724     if (map == NULL)
12725         return;
12726
12727     map->map_disallow_data_exec = TRUE;
12728 }
12729
12730 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12731  * more descriptive.
12732  */
12733 void
12734 vm_map_set_32bit(vm_map_t map)
12735 {
12736         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12737 }
12738
12739
12740 void
12741 vm_map_set_64bit(vm_map_t map)
12742 {
12743         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12744 }
12745
12746 vm_map_offset_t
12747 vm_compute_max_offset(unsigned is64)
12748 {
12749         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12750 }
12751
12752 boolean_t
12753 vm_map_is_64bit(
12754                 vm_map_t map)
12755 {
12756         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12757 }
12758
12759 boolean_t
12760 vm_map_has_4GB_pagezero(
12761                 vm_map_t map)
12762 {
12763         /*
12764          * XXX FBDP
12765          * We should lock the VM map (for read) here but we can get away
12766          * with it for now because there can't really be any race condition:
12767          * the VM map's min_offset is changed only when the VM map is created
12768          * and when the zero page is established (when the binary gets loaded),
12769          * and this routine gets called only when the task terminates and the
12770          * VM map is being torn down, and when a new map is created via
12771          * load_machfile()/execve().
12772          */
12773         return (map->min_offset >= 0x100000000ULL);
12774 }
12775
12776 void
12777 vm_map_set_4GB_pagezero(vm_map_t map)
12778 {
12779 #if defined(__i386__)
12780         pmap_set_4GB_pagezero(map->pmap);
12781 #else
12782 #pragma unused(map)
12783 #endif
12784
12785 }
12786
12787 void
12788 vm_map_clear_4GB_pagezero(vm_map_t map)
12789 {
12790 #if defined(__i386__)
12791         pmap_clear_4GB_pagezero(map->pmap);
12792 #else
12793 #pragma unused(map)
12794 #endif
12795 }
12796
12797 /*
12798  * Raise a VM map's minimum offset.
12799  * To strictly enforce "page zero" reservation.
12800  */
12801 kern_return_t
12802 vm_map_raise_min_offset(
12803         vm_map_t        map,
12804         vm_map_offset_t new_min_offset)
12805 {
12806         vm_map_entry_t  first_entry;
12807
12808         new_min_offset = vm_map_round_page(new_min_offset);
12809
12810         vm_map_lock(map);
12811
12812         if (new_min_offset < map->min_offset) {
12813                 /*
12814                  * Can't move min_offset backwards, as that would expose
12815                  * a part of the address space that was previously, and for
12816                  * possibly good reasons, inaccessible.
12817                  */
12818                 vm_map_unlock(map);
12819                 return KERN_INVALID_ADDRESS;
12820         }
12821
12822         first_entry = vm_map_first_entry(map);
12823         if (first_entry != vm_map_to_entry(map) &&
12824             first_entry->vme_start < new_min_offset) {
12825                 /*
12826                  * Some memory was already allocated below the new
12827                  * minimun offset.  It's too late to change it now...
12828                  */
12829                 vm_map_unlock(map);
12830                 return KERN_NO_SPACE;
12831         }
12832
12833         map->min_offset = new_min_offset;
12834
12835         vm_map_unlock(map);
12836
12837         return KERN_SUCCESS;
12838 }
12839
12840 /*
12841  * Set the limit on the maximum amount of user wired memory allowed for this map.
12842  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12843  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12844  * don't have to reach over to the BSD data structures.
12845  */
12846
12847 void
12848 vm_map_set_user_wire_limit(vm_map_t     map,
12849                            vm_size_t    limit)
12850 {
12851         map->user_wire_limit = limit;
12852 }
12853
12854
12855 void vm_map_switch_protect(vm_map_t     map,
12856                            boolean_t    val)
12857 {
12858         vm_map_lock(map);
12859         map->switch_protect=val;
12860         vm_map_unlock(map);
12861 }
12862
12863 /* Add (generate) code signature for memory range */
12864 #if CONFIG_DYNAMIC_CODE_SIGNING
12865 kern_return_t vm_map_sign(vm_map_t map,
12866                  vm_map_offset_t start,
12867                  vm_map_offset_t end)
12868 {
12869         vm_map_entry_t entry;
12870         vm_page_t m;
12871         vm_object_t object;
12872
12873         /*
12874          * Vet all the input parameters and current type and state of the
12875          * underlaying object.  Return with an error if anything is amiss.
12876          */
12877         if (map == VM_MAP_NULL)
12878                 return(KERN_INVALID_ARGUMENT);
12879
12880         vm_map_lock_read(map);
12881
12882         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12883                 /*
12884                  * Must pass a valid non-submap address.
12885                  */
12886                 vm_map_unlock_read(map);
12887                 return(KERN_INVALID_ADDRESS);
12888         }
12889
12890         if((entry->vme_start > start) || (entry->vme_end < end)) {
12891                 /*
12892                  * Map entry doesn't cover the requested range. Not handling
12893                  * this situation currently.
12894                  */
12895                 vm_map_unlock_read(map);
12896                 return(KERN_INVALID_ARGUMENT);
12897         }
12898
12899         object = entry->object.vm_object;
12900         if (object == VM_OBJECT_NULL) {
12901                 /*
12902                  * Object must already be present or we can't sign.
12903                  */
12904                 vm_map_unlock_read(map);
12905                 return KERN_INVALID_ARGUMENT;
12906         }
12907
12908         vm_object_lock(object);
12909         vm_map_unlock_read(map);
12910
12911         while(start < end) {
12912                 uint32_t refmod;
12913
12914                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12915                 if (m==VM_PAGE_NULL) {
12916                         /* shoud we try to fault a page here? we can probably
12917                          * demand it exists and is locked for this request */
12918                         vm_object_unlock(object);
12919                         return KERN_FAILURE;
12920                 }
12921                 /* deal with special page status */
12922                 if (m->busy ||
12923                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12924                         vm_object_unlock(object);
12925                         return KERN_FAILURE;
12926                 }
12927
12928                 /* Page is OK... now "validate" it */
12929                 /* This is the place where we'll call out to create a code
12930                  * directory, later */
12931                 m->cs_validated = TRUE;
12932
12933                 /* The page is now "clean" for codesigning purposes. That means
12934                  * we don't consider it as modified (wpmapped) anymore. But
12935                  * we'll disconnect the page so we note any future modification
12936                  * attempts. */
12937                 m->wpmapped = FALSE;
12938                 refmod = pmap_disconnect(m->phys_page);
12939
12940                 /* Pull the dirty status from the pmap, since we cleared the
12941                  * wpmapped bit */
12942                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
12943                         m->dirty = TRUE;
12944                 }
12945
12946                 /* On to the next page */
12947                 start += PAGE_SIZE;
12948         }
12949         vm_object_unlock(object);
12950
12951         return KERN_SUCCESS;
12952 }
12953 #endif
12954
12955 #if CONFIG_FREEZE
12956
12957 kern_return_t vm_map_freeze_walk(
12958                 vm_map_t map,
12959                 unsigned int *purgeable_count,
12960                 unsigned int *wired_count,
12961                 unsigned int *clean_count,
12962                 unsigned int *dirty_count,
12963                 boolean_t *has_shared)
12964 {
12965         vm_map_entry_t entry;
12966
12967         vm_map_lock_read(map);
12968
12969         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
12970         *has_shared = FALSE;
12971
12972         for (entry = vm_map_first_entry(map);
12973              entry != vm_map_to_entry(map);
12974              entry = entry->vme_next) {
12975                 unsigned int purgeable, clean, dirty, wired;
12976                 boolean_t shared;
12977
12978                 if ((entry->object.vm_object == 0) ||
12979                     (entry->is_sub_map) ||
12980                     (entry->object.vm_object->phys_contiguous)) {
12981                         continue;
12982                 }
12983
12984                 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
12985
12986                 *purgeable_count += purgeable;
12987                 *wired_count += wired;
12988                 *clean_count += clean;
12989                 *dirty_count += dirty;
12990
12991                 if (shared) {
12992                         *has_shared = TRUE;
12993                 }
12994         }
12995
12996         vm_map_unlock_read(map);
12997
12998         return KERN_SUCCESS;
12999 }
13000
13001 kern_return_t vm_map_freeze(
13002                 vm_map_t map,
13003                 unsigned int *purgeable_count,
13004                 unsigned int *wired_count,
13005                 unsigned int *clean_count,
13006                 unsigned int *dirty_count,
13007                 boolean_t *has_shared)
13008 {
13009         vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13010         vm_object_t compact_object = VM_OBJECT_NULL;
13011         vm_object_offset_t offset = 0x0;
13012         kern_return_t kr = KERN_SUCCESS;
13013         void *default_freezer_toc = NULL;
13014         boolean_t cleanup = FALSE;
13015
13016         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13017         *has_shared = FALSE;
13018
13019         /* Create our compact object */
13020         compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13021         if (!compact_object) {
13022                 kr = KERN_FAILURE;
13023                 goto done;
13024         }
13025
13026         default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13027         if (!default_freezer_toc) {
13028                 kr = KERN_FAILURE;
13029                 goto done;
13030         }
13031
13032         /*
13033          * We need the exclusive lock here so that we can
13034          * block any page faults or lookups while we are
13035          * in the middle of freezing this vm map.
13036          */
13037         vm_map_lock(map);
13038
13039         if (map->default_freezer_toc != NULL){
13040                 /*
13041                  * This map has already been frozen.
13042                  */
13043                 cleanup = TRUE;
13044                 kr = KERN_SUCCESS;
13045                 goto done;
13046         }
13047
13048         /* Get a mapping in place for the freezing about to commence */
13049         map->default_freezer_toc = default_freezer_toc;
13050
13051         vm_object_lock(compact_object);
13052
13053         for (entry2 = vm_map_first_entry(map);
13054              entry2 != vm_map_to_entry(map);
13055              entry2 = entry2->vme_next) {
13056
13057                 vm_object_t     src_object = entry2->object.vm_object;
13058
13059                 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13060                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13061                         unsigned int purgeable, clean, dirty, wired;
13062                         boolean_t shared;
13063
13064                         vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13065                                                         src_object, compact_object, &default_freezer_toc, &offset);
13066
13067                         *purgeable_count += purgeable;
13068                         *wired_count += wired;
13069                         *clean_count += clean;
13070                         *dirty_count += dirty;
13071
13072                         if (shared) {
13073                                 *has_shared = TRUE;
13074                         }
13075                 }
13076         }
13077
13078         vm_object_unlock(compact_object);
13079
13080         /* Finally, throw out the pages to swap */
13081         vm_object_pageout(compact_object);
13082
13083 done:
13084         vm_map_unlock(map);
13085
13086         /* Unwind if there was a failure */
13087         if ((cleanup) || (KERN_SUCCESS != kr)) {
13088                 if (default_freezer_toc){
13089                         default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13090                 }
13091                 if (compact_object){
13092                         vm_object_deallocate(compact_object);
13093                 }
13094         }
13095
13096         return kr;
13097 }
13098
13099 __private_extern__ vm_object_t  default_freezer_get_compact_vm_object( void** );
13100
13101 void
13102 vm_map_thaw(
13103         vm_map_t map)
13104 {
13105         void **default_freezer_toc;
13106         vm_object_t compact_object;
13107
13108         vm_map_lock(map);
13109
13110         if (map->default_freezer_toc == NULL){
13111                 /*
13112                  * This map is not in a frozen state.
13113                  */
13114                 goto out;
13115         }
13116
13117         default_freezer_toc = &(map->default_freezer_toc);
13118
13119         compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13120
13121         /* Bring the pages back in */
13122         vm_object_pagein(compact_object);
13123
13124         /* Shift pages back to their original objects */
13125         vm_object_unpack(compact_object, default_freezer_toc);
13126
13127         vm_object_deallocate(compact_object);
13128
13129         map->default_freezer_toc = NULL;
13130
13131 out:
13132         vm_map_unlock(map);
13133 }
13134 #endif