osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <machine/db_machdep.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_shared_region.h>
 107 #include <vm/vm_map_store.h>
 108
 109 /* Internal prototypes
 110  */
 111
 112 static void vm_map_simplify_range(
 113         vm_map_t        map,
 114         vm_map_offset_t start,
 115         vm_map_offset_t end);   /* forward */
 116
 117 static boolean_t        vm_map_range_check(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end,
 121         vm_map_entry_t  *entry);
 122
 123 static vm_map_entry_t   _vm_map_entry_create(
 124         struct vm_map_header    *map_header);
 125
 126 static void             _vm_map_entry_dispose(
 127         struct vm_map_header    *map_header,
 128         vm_map_entry_t          entry);
 129
 130 static void             vm_map_pmap_enter(
 131         vm_map_t                map,
 132         vm_map_offset_t         addr,
 133         vm_map_offset_t         end_addr,
 134         vm_object_t             object,
 135         vm_object_offset_t      offset,
 136         vm_prot_t               protection);
 137
 138 static void             _vm_map_clip_end(
 139         struct vm_map_header    *map_header,
 140         vm_map_entry_t          entry,
 141         vm_map_offset_t         end);
 142
 143 static void             _vm_map_clip_start(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         start);
 147
 148 static void             vm_map_entry_delete(
 149         vm_map_t        map,
 150         vm_map_entry_t  entry);
 151
 152 static kern_return_t    vm_map_delete(
 153         vm_map_t        map,
 154         vm_map_offset_t start,
 155         vm_map_offset_t end,
 156         int             flags,
 157         vm_map_t        zap_map);
 158
 159 static kern_return_t    vm_map_copy_overwrite_unaligned(
 160         vm_map_t        dst_map,
 161         vm_map_entry_t  entry,
 162         vm_map_copy_t   copy,
 163         vm_map_address_t start);
 164
 165 static kern_return_t    vm_map_copy_overwrite_aligned(
 166         vm_map_t        dst_map,
 167         vm_map_entry_t  tmp_entry,
 168         vm_map_copy_t   copy,
 169         vm_map_offset_t start,
 170         pmap_t          pmap);
 171
 172 static kern_return_t    vm_map_copyin_kernel_buffer(
 173         vm_map_t        src_map,
 174         vm_map_address_t src_addr,
 175         vm_map_size_t   len,
 176         boolean_t       src_destroy,
 177         vm_map_copy_t   *copy_result);  /* OUT */
 178
 179 static kern_return_t    vm_map_copyout_kernel_buffer(
 180         vm_map_t        map,
 181         vm_map_address_t *addr, /* IN/OUT */
 182         vm_map_copy_t   copy,
 183         boolean_t       overwrite);
 184
 185 static void             vm_map_fork_share(
 186         vm_map_t        old_map,
 187         vm_map_entry_t  old_entry,
 188         vm_map_t        new_map);
 189
 190 static boolean_t        vm_map_fork_copy(
 191         vm_map_t        old_map,
 192         vm_map_entry_t  *old_entry_p,
 193         vm_map_t        new_map);
 194
 195 void            vm_map_region_top_walk(
 196         vm_map_entry_t             entry,
 197         vm_region_top_info_t       top);
 198
 199 void            vm_map_region_walk(
 200         vm_map_t                   map,
 201         vm_map_offset_t            va,
 202         vm_map_entry_t             entry,
 203         vm_object_offset_t         offset,
 204         vm_object_size_t           range,
 205         vm_region_extended_info_t  extended,
 206         boolean_t                  look_for_pages);
 207
 208 static kern_return_t    vm_map_wire_nested(
 209         vm_map_t                   map,
 210         vm_map_offset_t            start,
 211         vm_map_offset_t            end,
 212         vm_prot_t                  access_type,
 213         boolean_t                  user_wire,
 214         pmap_t                     map_pmap,
 215         vm_map_offset_t            pmap_addr);
 216
 217 static kern_return_t    vm_map_unwire_nested(
 218         vm_map_t                   map,
 219         vm_map_offset_t            start,
 220         vm_map_offset_t            end,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr);
 224
 225 static kern_return_t    vm_map_overwrite_submap_recurse(
 226         vm_map_t                   dst_map,
 227         vm_map_offset_t            dst_addr,
 228         vm_map_size_t              dst_size);
 229
 230 static kern_return_t    vm_map_copy_overwrite_nested(
 231         vm_map_t                   dst_map,
 232         vm_map_offset_t            dst_addr,
 233         vm_map_copy_t              copy,
 234         boolean_t                  interruptible,
 235         pmap_t                     pmap,
 236         boolean_t                  discard_on_success);
 237
 238 static kern_return_t    vm_map_remap_extract(
 239         vm_map_t                map,
 240         vm_map_offset_t         addr,
 241         vm_map_size_t           size,
 242         boolean_t               copy,
 243         struct vm_map_header    *map_header,
 244         vm_prot_t               *cur_protection,
 245         vm_prot_t               *max_protection,
 246         vm_inherit_t            inheritance,
 247         boolean_t               pageable);
 248
 249 static kern_return_t    vm_map_remap_range_allocate(
 250         vm_map_t                map,
 251         vm_map_address_t        *address,
 252         vm_map_size_t           size,
 253         vm_map_offset_t         mask,
 254         int                     flags,
 255         vm_map_entry_t          *map_entry);
 256
 257 static void             vm_map_region_look_for_page(
 258         vm_map_t                   map,
 259         vm_map_offset_t            va,
 260         vm_object_t                object,
 261         vm_object_offset_t         offset,
 262         int                        max_refcnt,
 263         int                        depth,
 264         vm_region_extended_info_t  extended);
 265
 266 static int              vm_map_region_count_obj_refs(
 267         vm_map_entry_t             entry,
 268         vm_object_t                object);
 269
 270
 271 static kern_return_t    vm_map_willneed(
 272         vm_map_t        map,
 273         vm_map_offset_t start,
 274         vm_map_offset_t end);
 275
 276 static kern_return_t    vm_map_reuse_pages(
 277         vm_map_t        map,
 278         vm_map_offset_t start,
 279         vm_map_offset_t end);
 280
 281 static kern_return_t    vm_map_reusable_pages(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_can_reuse(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 #if CONFIG_FREEZE
 292 struct default_freezer_table;
 293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
 294 __private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);
 295 #endif
 296
 297 /*
 298  * Macros to copy a vm_map_entry. We must be careful to correctly
 299  * manage the wired page count. vm_map_entry_copy() creates a new
 300  * map entry to the same memory - the wired count in the new entry
 301  * must be set to zero. vm_map_entry_copy_full() creates a new
 302  * entry that is identical to the old entry.  This preserves the
 303  * wire count; it's used for map splitting and zone changing in
 304  * vm_map_copyout.
 305  */
 306 #define vm_map_entry_copy(NEW,OLD) \
 307 MACRO_BEGIN                                     \
 308         *(NEW) = *(OLD);                \
 309         (NEW)->is_shared = FALSE;       \
 310         (NEW)->needs_wakeup = FALSE;    \
 311         (NEW)->in_transition = FALSE;   \
 312         (NEW)->wired_count = 0;         \
 313         (NEW)->user_wired_count = 0;    \
 314         (NEW)->permanent = FALSE;       \
 315 MACRO_END
 316
 317 #define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
 318
 319 /*
 320  *      Decide if we want to allow processes to execute from their data or stack areas.
 321  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 322  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 323  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 324  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 325  *      specific pmap files since the default behavior varies according to architecture.  The
 326  *      main reason it varies is because of the need to provide binary compatibility with old
 327  *      applications that were written before these restrictions came into being.  In the old
 328  *      days, an app could execute anything it could read, but this has slowly been tightened
 329  *      up over time.  The default behavior is:
 330  *
 331  *      32-bit PPC apps         may execute from both stack and data areas
 332  *      32-bit Intel apps       may exeucte from data areas but not stack
 333  *      64-bit PPC/Intel apps   may not execute from either data or stack
 334  *
 335  *      An application on any architecture may override these defaults by explicitly
 336  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 337  *      system call.  This code here just determines what happens when an app tries to
 338  *      execute from a page that lacks execute permission.
 339  *
 340  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 341  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 342  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 343  *      execution from data areas for a particular binary even if the arch normally permits it. As
 344  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 345  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 346  *      are not all NX-safe.
 347  */
 348
 349 extern int allow_data_exec, allow_stack_exec;
 350
 351 int
 352 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 353 {
 354         int current_abi;
 355
 356         /*
 357          * Determine if the app is running in 32 or 64 bit mode.
 358          */
 359
 360         if (vm_map_is_64bit(map))
 361                 current_abi = VM_ABI_64;
 362         else
 363                 current_abi = VM_ABI_32;
 364
 365         /*
 366          * Determine if we should allow the execution based on whether it's a
 367          * stack or data area and the current architecture.
 368          */
 369
 370         if (user_tag == VM_MEMORY_STACK)
 371                 return allow_stack_exec & current_abi;
 372
 373         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 374 }
 375
 376
 377 /*
 378  *      Virtual memory maps provide for the mapping, protection,
 379  *      and sharing of virtual memory objects.  In addition,
 380  *      this module provides for an efficient virtual copy of
 381  *      memory from one map to another.
 382  *
 383  *      Synchronization is required prior to most operations.
 384  *
 385  *      Maps consist of an ordered doubly-linked list of simple
 386  *      entries; a single hint is used to speed up lookups.
 387  *
 388  *      Sharing maps have been deleted from this version of Mach.
 389  *      All shared objects are now mapped directly into the respective
 390  *      maps.  This requires a change in the copy on write strategy;
 391  *      the asymmetric (delayed) strategy is used for shared temporary
 392  *      objects instead of the symmetric (shadow) strategy.  All maps
 393  *      are now "top level" maps (either task map, kernel map or submap
 394  *      of the kernel map).
 395  *
 396  *      Since portions of maps are specified by start/end addreses,
 397  *      which may not align with existing map entries, all
 398  *      routines merely "clip" entries to these start/end values.
 399  *      [That is, an entry is split into two, bordering at a
 400  *      start or end value.]  Note that these clippings may not
 401  *      always be necessary (as the two resulting entries are then
 402  *      not changed); however, the clipping is done for convenience.
 403  *      No attempt is currently made to "glue back together" two
 404  *      abutting entries.
 405  *
 406  *      The symmetric (shadow) copy strategy implements virtual copy
 407  *      by copying VM object references from one map to
 408  *      another, and then marking both regions as copy-on-write.
 409  *      It is important to note that only one writeable reference
 410  *      to a VM object region exists in any map when this strategy
 411  *      is used -- this means that shadow object creation can be
 412  *      delayed until a write operation occurs.  The symmetric (delayed)
 413  *      strategy allows multiple maps to have writeable references to
 414  *      the same region of a vm object, and hence cannot delay creating
 415  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 416  *      Copying of permanent objects is completely different; see
 417  *      vm_object_copy_strategically() in vm_object.c.
 418  */
 419
 420 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 421 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 422 static zone_t   vm_map_kentry_zone;     /* zone for kernel entry structures */
 423 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 424
 425
 426 /*
 427  *      Placeholder object for submap operations.  This object is dropped
 428  *      into the range by a call to vm_map_find, and removed when
 429  *      vm_map_submap creates the submap.
 430  */
 431
 432 vm_object_t     vm_submap_object;
 433
 434 static void             *map_data;
 435 static vm_size_t        map_data_size;
 436 static void             *kentry_data;
 437 static vm_size_t        kentry_data_size;
 438 static int              kentry_count = 2048;            /* to init kentry_data_size */
 439
 440 #if CONFIG_EMBEDDED
 441 #define         NO_COALESCE_LIMIT  0
 442 #else
 443 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 444 #endif
 445
 446 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 447 unsigned int not_in_kdp = 1;
 448
 449 unsigned int vm_map_set_cache_attr_count = 0;
 450
 451 kern_return_t
 452 vm_map_set_cache_attr(
 453         vm_map_t        map,
 454         vm_map_offset_t va)
 455 {
 456         vm_map_entry_t  map_entry;
 457         vm_object_t     object;
 458         kern_return_t   kr = KERN_SUCCESS;
 459
 460         vm_map_lock_read(map);
 461
 462         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 463             map_entry->is_sub_map) {
 464                 /*
 465                  * that memory is not properly mapped
 466                  */
 467                 kr = KERN_INVALID_ARGUMENT;
 468                 goto done;
 469         }
 470         object = map_entry->object.vm_object;
 471
 472         if (object == VM_OBJECT_NULL) {
 473                 /*
 474                  * there should be a VM object here at this point
 475                  */
 476                 kr = KERN_INVALID_ARGUMENT;
 477                 goto done;
 478         }
 479         vm_object_lock(object);
 480         object->set_cache_attr = TRUE;
 481         vm_object_unlock(object);
 482
 483         vm_map_set_cache_attr_count++;
 484 done:
 485         vm_map_unlock_read(map);
 486
 487         return kr;
 488 }
 489
 490
 491 #if CONFIG_CODE_DECRYPTION
 492 /*
 493  * vm_map_apple_protected:
 494  * This remaps the requested part of the object with an object backed by
 495  * the decrypting pager.
 496  * crypt_info contains entry points and session data for the crypt module.
 497  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 498  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 499  */
 500 kern_return_t
 501 vm_map_apple_protected(
 502         vm_map_t        map,
 503         vm_map_offset_t start,
 504         vm_map_offset_t end,
 505         struct pager_crypt_info *crypt_info)
 506 {
 507         boolean_t       map_locked;
 508         kern_return_t   kr;
 509         vm_map_entry_t  map_entry;
 510         memory_object_t protected_mem_obj;
 511         vm_object_t     protected_object;
 512         vm_map_offset_t map_addr;
 513
 514         vm_map_lock_read(map);
 515         map_locked = TRUE;
 516
 517         /* lookup the protected VM object */
 518         if (!vm_map_lookup_entry(map,
 519                                  start,
 520                                  &map_entry) ||
 521             map_entry->vme_end < end ||
 522             map_entry->is_sub_map) {
 523                 /* that memory is not properly mapped */
 524                 kr = KERN_INVALID_ARGUMENT;
 525                 goto done;
 526         }
 527         protected_object = map_entry->object.vm_object;
 528         if (protected_object == VM_OBJECT_NULL) {
 529                 /* there should be a VM object here at this point */
 530                 kr = KERN_INVALID_ARGUMENT;
 531                 goto done;
 532         }
 533
 534         /* make sure protected object stays alive while map is unlocked */
 535         vm_object_reference(protected_object);
 536
 537         vm_map_unlock_read(map);
 538         map_locked = FALSE;
 539
 540         /*
 541          * Lookup (and create if necessary) the protected memory object
 542          * matching that VM object.
 543          * If successful, this also grabs a reference on the memory object,
 544          * to guarantee that it doesn't go away before we get a chance to map
 545          * it.
 546          */
 547         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 548
 549         /* release extra ref on protected object */
 550         vm_object_deallocate(protected_object);
 551
 552         if (protected_mem_obj == NULL) {
 553                 kr = KERN_FAILURE;
 554                 goto done;
 555         }
 556
 557         /* map this memory object in place of the current one */
 558         map_addr = start;
 559         kr = vm_map_enter_mem_object(map,
 560                                      &map_addr,
 561                                      end - start,
 562                                      (mach_vm_offset_t) 0,
 563                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 564                                      (ipc_port_t) protected_mem_obj,
 565                                      (map_entry->offset +
 566                                       (start - map_entry->vme_start)),
 567                                      TRUE,
 568                                      map_entry->protection,
 569                                      map_entry->max_protection,
 570                                      map_entry->inheritance);
 571         assert(map_addr == start);
 572         /*
 573          * Release the reference obtained by apple_protect_pager_setup().
 574          * The mapping (if it succeeded) is now holding a reference on the
 575          * memory object.
 576          */
 577         memory_object_deallocate(protected_mem_obj);
 578
 579 done:
 580         if (map_locked) {
 581                 vm_map_unlock_read(map);
 582         }
 583         return kr;
 584 }
 585 #endif  /* CONFIG_CODE_DECRYPTION */
 586
 587
 588 lck_grp_t               vm_map_lck_grp;
 589 lck_grp_attr_t  vm_map_lck_grp_attr;
 590 lck_attr_t              vm_map_lck_attr;
 591
 592
 593 /*
 594  *      vm_map_init:
 595  *
 596  *      Initialize the vm_map module.  Must be called before
 597  *      any other vm_map routines.
 598  *
 599  *      Map and entry structures are allocated from zones -- we must
 600  *      initialize those zones.
 601  *
 602  *      There are three zones of interest:
 603  *
 604  *      vm_map_zone:            used to allocate maps.
 605  *      vm_map_entry_zone:      used to allocate map entries.
 606  *      vm_map_kentry_zone:     used to allocate map entries for the kernel.
 607  *
 608  *      The kernel allocates map entries from a special zone that is initially
 609  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 610  *      the kernel to allocate more memory to a entry zone when it became
 611  *      empty since the very act of allocating memory implies the creation
 612  *      of a new entry.
 613  */
 614 void
 615 vm_map_init(
 616         void)
 617 {
 618         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 619                             PAGE_SIZE, "maps");
 620         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 621
 622         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 623                                   1024*1024, PAGE_SIZE*5,
 624                                   "non-kernel map entries");
 625         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 626
 627         vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 628                                    kentry_data_size, kentry_data_size,
 629                                    "kernel map entries");
 630         zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
 631
 632         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 633                                  16*1024, PAGE_SIZE, "map copies");
 634         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 635
 636         /*
 637          *      Cram the map and kentry zones with initial data.
 638          *      Set kentry_zone non-collectible to aid zone_gc().
 639          */
 640         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 641         zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
 642         zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
 643         zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
 644         zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 645         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 646
 647         zcram(vm_map_zone, map_data, map_data_size);
 648         zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
 649
 650         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 651         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 652         lck_attr_setdefault(&vm_map_lck_attr);
 653 }
 654
 655 void
 656 vm_map_steal_memory(
 657         void)
 658 {
 659         map_data_size = round_page(10 * sizeof(struct _vm_map));
 660         map_data = pmap_steal_memory(map_data_size);
 661
 662 #if 0
 663         /*
 664          * Limiting worst case: vm_map_kentry_zone needs to map each "available"
 665          * physical page (i.e. that beyond the kernel image and page tables)
 666          * individually; we guess at most one entry per eight pages in the
 667          * real world. This works out to roughly .1 of 1% of physical memory,
 668          * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
 669          */
 670 #endif
 671         kentry_count = pmap_free_pages() / 8;
 672
 673
 674         kentry_data_size =
 675                 round_page(kentry_count * sizeof(struct vm_map_entry));
 676         kentry_data = pmap_steal_memory(kentry_data_size);
 677 }
 678
 679 /*
 680  *      vm_map_create:
 681  *
 682  *      Creates and returns a new empty VM map with
 683  *      the given physical map structure, and having
 684  *      the given lower and upper address bounds.
 685  */
 686 vm_map_t
 687 vm_map_create(
 688         pmap_t                  pmap,
 689         vm_map_offset_t min,
 690         vm_map_offset_t max,
 691         boolean_t               pageable)
 692 {
 693         static int              color_seed = 0;
 694         register vm_map_t       result;
 695
 696         result = (vm_map_t) zalloc(vm_map_zone);
 697         if (result == VM_MAP_NULL)
 698                 panic("vm_map_create");
 699
 700         vm_map_first_entry(result) = vm_map_to_entry(result);
 701         vm_map_last_entry(result)  = vm_map_to_entry(result);
 702         result->hdr.nentries = 0;
 703         result->hdr.entries_pageable = pageable;
 704
 705         vm_map_store_init( &(result->hdr) );
 706
 707         result->size = 0;
 708         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 709         result->user_wire_size  = 0;
 710         result->ref_count = 1;
 711 #if     TASK_SWAPPER
 712         result->res_count = 1;
 713         result->sw_state = MAP_SW_IN;
 714 #endif  /* TASK_SWAPPER */
 715         result->pmap = pmap;
 716         result->min_offset = min;
 717         result->max_offset = max;
 718         result->wiring_required = FALSE;
 719         result->no_zero_fill = FALSE;
 720         result->mapped = FALSE;
 721         result->wait_for_space = FALSE;
 722         result->switch_protect = FALSE;
 723         result->disable_vmentry_reuse = FALSE;
 724         result->map_disallow_data_exec = FALSE;
 725         result->highest_entry_end = 0;
 726         result->first_free = vm_map_to_entry(result);
 727         result->hint = vm_map_to_entry(result);
 728         result->color_rr = (color_seed++) & vm_color_mask;
 729         result->jit_entry_exists = FALSE;
 730 #if CONFIG_FREEZE
 731         result->default_freezer_toc = NULL;
 732 #endif
 733         vm_map_lock_init(result);
 734         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 735
 736         return(result);
 737 }
 738
 739 /*
 740  *      vm_map_entry_create:    [ internal use only ]
 741  *
 742  *      Allocates a VM map entry for insertion in the
 743  *      given map (or map copy).  No fields are filled.
 744  */
 745 #define vm_map_entry_create(map) \
 746         _vm_map_entry_create(&(map)->hdr)
 747
 748 #define vm_map_copy_entry_create(copy) \
 749         _vm_map_entry_create(&(copy)->cpy_hdr)
 750
 751 static vm_map_entry_t
 752 _vm_map_entry_create(
 753         register struct vm_map_header   *map_header)
 754 {
 755         register zone_t zone;
 756         register vm_map_entry_t entry;
 757
 758         if (map_header->entries_pageable)
 759                 zone = vm_map_entry_zone;
 760         else
 761                 zone = vm_map_kentry_zone;
 762
 763         entry = (vm_map_entry_t) zalloc(zone);
 764         if (entry == VM_MAP_ENTRY_NULL)
 765                 panic("vm_map_entry_create");
 766         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 767
 768         return(entry);
 769 }
 770
 771 /*
 772  *      vm_map_entry_dispose:   [ internal use only ]
 773  *
 774  *      Inverse of vm_map_entry_create.
 775  *
 776  *      write map lock held so no need to
 777  *      do anything special to insure correctness
 778  *      of the stores
 779  */
 780 #define vm_map_entry_dispose(map, entry)                        \
 781         vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);  \
 782         _vm_map_entry_dispose(&(map)->hdr, (entry))
 783
 784 #define vm_map_copy_entry_dispose(map, entry) \
 785         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 786
 787 static void
 788 _vm_map_entry_dispose(
 789         register struct vm_map_header   *map_header,
 790         register vm_map_entry_t         entry)
 791 {
 792         register zone_t         zone;
 793
 794         if (map_header->entries_pageable)
 795                 zone = vm_map_entry_zone;
 796         else
 797                 zone = vm_map_kentry_zone;
 798
 799         zfree(zone, entry);
 800 }
 801
 802 #if MACH_ASSERT
 803 static boolean_t first_free_check = FALSE;
 804 boolean_t
 805 first_free_is_valid(
 806         vm_map_t        map)
 807 {
 808         if (!first_free_check)
 809                 return TRUE;
 810
 811         return( first_free_is_valid_store( map ));
 812 }
 813 #endif /* MACH_ASSERT */
 814
 815
 816 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 817         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 818
 819 #define vm_map_copy_entry_unlink(copy, entry)                           \
 820         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 821
 822 #if     MACH_ASSERT && TASK_SWAPPER
 823 /*
 824  *      vm_map_res_reference:
 825  *
 826  *      Adds another valid residence count to the given map.
 827  *
 828  *      Map is locked so this function can be called from
 829  *      vm_map_swapin.
 830  *
 831  */
 832 void vm_map_res_reference(register vm_map_t map)
 833 {
 834         /* assert map is locked */
 835         assert(map->res_count >= 0);
 836         assert(map->ref_count >= map->res_count);
 837         if (map->res_count == 0) {
 838                 lck_mtx_unlock(&map->s_lock);
 839                 vm_map_lock(map);
 840                 vm_map_swapin(map);
 841                 lck_mtx_lock(&map->s_lock);
 842                 ++map->res_count;
 843                 vm_map_unlock(map);
 844         } else
 845                 ++map->res_count;
 846 }
 847
 848 /*
 849  *      vm_map_reference_swap:
 850  *
 851  *      Adds valid reference and residence counts to the given map.
 852  *
 853  *      The map may not be in memory (i.e. zero residence count).
 854  *
 855  */
 856 void vm_map_reference_swap(register vm_map_t map)
 857 {
 858         assert(map != VM_MAP_NULL);
 859         lck_mtx_lock(&map->s_lock);
 860         assert(map->res_count >= 0);
 861         assert(map->ref_count >= map->res_count);
 862         map->ref_count++;
 863         vm_map_res_reference(map);
 864         lck_mtx_unlock(&map->s_lock);
 865 }
 866
 867 /*
 868  *      vm_map_res_deallocate:
 869  *
 870  *      Decrement residence count on a map; possibly causing swapout.
 871  *
 872  *      The map must be in memory (i.e. non-zero residence count).
 873  *
 874  *      The map is locked, so this function is callable from vm_map_deallocate.
 875  *
 876  */
 877 void vm_map_res_deallocate(register vm_map_t map)
 878 {
 879         assert(map->res_count > 0);
 880         if (--map->res_count == 0) {
 881                 lck_mtx_unlock(&map->s_lock);
 882                 vm_map_lock(map);
 883                 vm_map_swapout(map);
 884                 vm_map_unlock(map);
 885                 lck_mtx_lock(&map->s_lock);
 886         }
 887         assert(map->ref_count >= map->res_count);
 888 }
 889 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 890
 891 /*
 892  *      vm_map_destroy:
 893  *
 894  *      Actually destroy a map.
 895  */
 896 void
 897 vm_map_destroy(
 898         vm_map_t        map,
 899         int             flags)
 900 {
 901         vm_map_lock(map);
 902
 903         /* clean up regular map entries */
 904         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 905                              flags, VM_MAP_NULL);
 906         /* clean up leftover special mappings (commpage, etc...) */
 907         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 908                              flags, VM_MAP_NULL);
 909
 910 #if CONFIG_FREEZE
 911         if (map->default_freezer_toc){
 912                 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
 913         }
 914 #endif
 915         vm_map_unlock(map);
 916
 917         assert(map->hdr.nentries == 0);
 918
 919         if(map->pmap)
 920                 pmap_destroy(map->pmap);
 921
 922         zfree(vm_map_zone, map);
 923 }
 924
 925 #if     TASK_SWAPPER
 926 /*
 927  * vm_map_swapin/vm_map_swapout
 928  *
 929  * Swap a map in and out, either referencing or releasing its resources.
 930  * These functions are internal use only; however, they must be exported
 931  * because they may be called from macros, which are exported.
 932  *
 933  * In the case of swapout, there could be races on the residence count,
 934  * so if the residence count is up, we return, assuming that a
 935  * vm_map_deallocate() call in the near future will bring us back.
 936  *
 937  * Locking:
 938  *      -- We use the map write lock for synchronization among races.
 939  *      -- The map write lock, and not the simple s_lock, protects the
 940  *         swap state of the map.
 941  *      -- If a map entry is a share map, then we hold both locks, in
 942  *         hierarchical order.
 943  *
 944  * Synchronization Notes:
 945  *      1) If a vm_map_swapin() call happens while swapout in progress, it
 946  *      will block on the map lock and proceed when swapout is through.
 947  *      2) A vm_map_reference() call at this time is illegal, and will
 948  *      cause a panic.  vm_map_reference() is only allowed on resident
 949  *      maps, since it refuses to block.
 950  *      3) A vm_map_swapin() call during a swapin will block, and
 951  *      proceeed when the first swapin is done, turning into a nop.
 952  *      This is the reason the res_count is not incremented until
 953  *      after the swapin is complete.
 954  *      4) There is a timing hole after the checks of the res_count, before
 955  *      the map lock is taken, during which a swapin may get the lock
 956  *      before a swapout about to happen.  If this happens, the swapin
 957  *      will detect the state and increment the reference count, causing
 958  *      the swapout to be a nop, thereby delaying it until a later
 959  *      vm_map_deallocate.  If the swapout gets the lock first, then
 960  *      the swapin will simply block until the swapout is done, and
 961  *      then proceed.
 962  *
 963  * Because vm_map_swapin() is potentially an expensive operation, it
 964  * should be used with caution.
 965  *
 966  * Invariants:
 967  *      1) A map with a residence count of zero is either swapped, or
 968  *         being swapped.
 969  *      2) A map with a non-zero residence count is either resident,
 970  *         or being swapped in.
 971  */
 972
 973 int vm_map_swap_enable = 1;
 974
 975 void vm_map_swapin (vm_map_t map)
 976 {
 977         register vm_map_entry_t entry;
 978
 979         if (!vm_map_swap_enable)        /* debug */
 980                 return;
 981
 982         /*
 983          * Map is locked
 984          * First deal with various races.
 985          */
 986         if (map->sw_state == MAP_SW_IN)
 987                 /*
 988                  * we raced with swapout and won.  Returning will incr.
 989                  * the res_count, turning the swapout into a nop.
 990                  */
 991                 return;
 992
 993         /*
 994          * The residence count must be zero.  If we raced with another
 995          * swapin, the state would have been IN; if we raced with a
 996          * swapout (after another competing swapin), we must have lost
 997          * the race to get here (see above comment), in which case
 998          * res_count is still 0.
 999          */
1000         assert(map->res_count == 0);
1001
1002         /*
1003          * There are no intermediate states of a map going out or
1004          * coming in, since the map is locked during the transition.
1005          */
1006         assert(map->sw_state == MAP_SW_OUT);
1007
1008         /*
1009          * We now operate upon each map entry.  If the entry is a sub-
1010          * or share-map, we call vm_map_res_reference upon it.
1011          * If the entry is an object, we call vm_object_res_reference
1012          * (this may iterate through the shadow chain).
1013          * Note that we hold the map locked the entire time,
1014          * even if we get back here via a recursive call in
1015          * vm_map_res_reference.
1016          */
1017         entry = vm_map_first_entry(map);
1018
1019         while (entry != vm_map_to_entry(map)) {
1020                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1021                         if (entry->is_sub_map) {
1022                                 vm_map_t lmap = entry->object.sub_map;
1023                                 lck_mtx_lock(&lmap->s_lock);
1024                                 vm_map_res_reference(lmap);
1025                                 lck_mtx_unlock(&lmap->s_lock);
1026                         } else {
1027                                 vm_object_t object = entry->object.vm_object;
1028                                 vm_object_lock(object);
1029                                 /*
1030                                  * This call may iterate through the
1031                                  * shadow chain.
1032                                  */
1033                                 vm_object_res_reference(object);
1034                                 vm_object_unlock(object);
1035                         }
1036                 }
1037                 entry = entry->vme_next;
1038         }
1039         assert(map->sw_state == MAP_SW_OUT);
1040         map->sw_state = MAP_SW_IN;
1041 }
1042
1043 void vm_map_swapout(vm_map_t map)
1044 {
1045         register vm_map_entry_t entry;
1046
1047         /*
1048          * Map is locked
1049          * First deal with various races.
1050          * If we raced with a swapin and lost, the residence count
1051          * will have been incremented to 1, and we simply return.
1052          */
1053         lck_mtx_lock(&map->s_lock);
1054         if (map->res_count != 0) {
1055                 lck_mtx_unlock(&map->s_lock);
1056                 return;
1057         }
1058         lck_mtx_unlock(&map->s_lock);
1059
1060         /*
1061          * There are no intermediate states of a map going out or
1062          * coming in, since the map is locked during the transition.
1063          */
1064         assert(map->sw_state == MAP_SW_IN);
1065
1066         if (!vm_map_swap_enable)
1067                 return;
1068
1069         /*
1070          * We now operate upon each map entry.  If the entry is a sub-
1071          * or share-map, we call vm_map_res_deallocate upon it.
1072          * If the entry is an object, we call vm_object_res_deallocate
1073          * (this may iterate through the shadow chain).
1074          * Note that we hold the map locked the entire time,
1075          * even if we get back here via a recursive call in
1076          * vm_map_res_deallocate.
1077          */
1078         entry = vm_map_first_entry(map);
1079
1080         while (entry != vm_map_to_entry(map)) {
1081                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082                         if (entry->is_sub_map) {
1083                                 vm_map_t lmap = entry->object.sub_map;
1084                                 lck_mtx_lock(&lmap->s_lock);
1085                                 vm_map_res_deallocate(lmap);
1086                                 lck_mtx_unlock(&lmap->s_lock);
1087                         } else {
1088                                 vm_object_t object = entry->object.vm_object;
1089                                 vm_object_lock(object);
1090                                 /*
1091                                  * This call may take a long time,
1092                                  * since it could actively push
1093                                  * out pages (if we implement it
1094                                  * that way).
1095                                  */
1096                                 vm_object_res_deallocate(object);
1097                                 vm_object_unlock(object);
1098                         }
1099                 }
1100                 entry = entry->vme_next;
1101         }
1102         assert(map->sw_state == MAP_SW_IN);
1103         map->sw_state = MAP_SW_OUT;
1104 }
1105
1106 #endif  /* TASK_SWAPPER */
1107
1108 /*
1109  *      vm_map_lookup_entry:    [ internal use only ]
1110  *
1111  *      Calls into the vm map store layer to find the map
1112  *      entry containing (or immediately preceding) the
1113  *      specified address in the given map; the entry is returned
1114  *      in the "entry" parameter.  The boolean
1115  *      result indicates whether the address is
1116  *      actually contained in the map.
1117  */
1118 boolean_t
1119 vm_map_lookup_entry(
1120         register vm_map_t               map,
1121         register vm_map_offset_t        address,
1122         vm_map_entry_t          *entry)         /* OUT */
1123 {
1124         return ( vm_map_store_lookup_entry( map, address, entry ));
1125 }
1126
1127 /*
1128  *      Routine:        vm_map_find_space
1129  *      Purpose:
1130  *              Allocate a range in the specified virtual address map,
1131  *              returning the entry allocated for that range.
1132  *              Used by kmem_alloc, etc.
1133  *
1134  *              The map must be NOT be locked. It will be returned locked
1135  *              on KERN_SUCCESS, unlocked on failure.
1136  *
1137  *              If an entry is allocated, the object/offset fields
1138  *              are initialized to zero.
1139  */
1140 kern_return_t
1141 vm_map_find_space(
1142         register vm_map_t       map,
1143         vm_map_offset_t         *address,       /* OUT */
1144         vm_map_size_t           size,
1145         vm_map_offset_t         mask,
1146         int                     flags,
1147         vm_map_entry_t          *o_entry)       /* OUT */
1148 {
1149         register vm_map_entry_t entry, new_entry;
1150         register vm_map_offset_t        start;
1151         register vm_map_offset_t        end;
1152
1153         if (size == 0) {
1154                 *address = 0;
1155                 return KERN_INVALID_ARGUMENT;
1156         }
1157
1158         if (flags & VM_FLAGS_GUARD_AFTER) {
1159                 /* account for the back guard page in the size */
1160                 size += PAGE_SIZE_64;
1161         }
1162
1163         new_entry = vm_map_entry_create(map);
1164
1165         /*
1166          *      Look for the first possible address; if there's already
1167          *      something at this address, we have to start after it.
1168          */
1169
1170         vm_map_lock(map);
1171
1172         if( map->disable_vmentry_reuse == TRUE) {
1173                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1174         } else {
1175                 assert(first_free_is_valid(map));
1176                 if ((entry = map->first_free) == vm_map_to_entry(map))
1177                         start = map->min_offset;
1178                 else
1179                         start = entry->vme_end;
1180         }
1181
1182         /*
1183          *      In any case, the "entry" always precedes
1184          *      the proposed new region throughout the loop:
1185          */
1186
1187         while (TRUE) {
1188                 register vm_map_entry_t next;
1189
1190                 /*
1191                  *      Find the end of the proposed new region.
1192                  *      Be sure we didn't go beyond the end, or
1193                  *      wrap around the address.
1194                  */
1195
1196                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1197                         /* reserve space for the front guard page */
1198                         start += PAGE_SIZE_64;
1199                 }
1200                 end = ((start + mask) & ~mask);
1201
1202                 if (end < start) {
1203                         vm_map_entry_dispose(map, new_entry);
1204                         vm_map_unlock(map);
1205                         return(KERN_NO_SPACE);
1206                 }
1207                 start = end;
1208                 end += size;
1209
1210                 if ((end > map->max_offset) || (end < start)) {
1211                         vm_map_entry_dispose(map, new_entry);
1212                         vm_map_unlock(map);
1213                         return(KERN_NO_SPACE);
1214                 }
1215
1216                 /*
1217                  *      If there are no more entries, we must win.
1218                  */
1219
1220                 next = entry->vme_next;
1221                 if (next == vm_map_to_entry(map))
1222                         break;
1223
1224                 /*
1225                  *      If there is another entry, it must be
1226                  *      after the end of the potential new region.
1227                  */
1228
1229                 if (next->vme_start >= end)
1230                         break;
1231
1232                 /*
1233                  *      Didn't fit -- move to the next entry.
1234                  */
1235
1236                 entry = next;
1237                 start = entry->vme_end;
1238         }
1239
1240         /*
1241          *      At this point,
1242          *              "start" and "end" should define the endpoints of the
1243          *                      available new range, and
1244          *              "entry" should refer to the region before the new
1245          *                      range, and
1246          *
1247          *              the map should be locked.
1248          */
1249
1250         if (flags & VM_FLAGS_GUARD_BEFORE) {
1251                 /* go back for the front guard page */
1252                 start -= PAGE_SIZE_64;
1253         }
1254         *address = start;
1255
1256         new_entry->vme_start = start;
1257         new_entry->vme_end = end;
1258         assert(page_aligned(new_entry->vme_start));
1259         assert(page_aligned(new_entry->vme_end));
1260
1261         new_entry->is_shared = FALSE;
1262         new_entry->is_sub_map = FALSE;
1263         new_entry->use_pmap = FALSE;
1264         new_entry->object.vm_object = VM_OBJECT_NULL;
1265         new_entry->offset = (vm_object_offset_t) 0;
1266
1267         new_entry->needs_copy = FALSE;
1268
1269         new_entry->inheritance = VM_INHERIT_DEFAULT;
1270         new_entry->protection = VM_PROT_DEFAULT;
1271         new_entry->max_protection = VM_PROT_ALL;
1272         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1273         new_entry->wired_count = 0;
1274         new_entry->user_wired_count = 0;
1275
1276         new_entry->in_transition = FALSE;
1277         new_entry->needs_wakeup = FALSE;
1278         new_entry->no_cache = FALSE;
1279         new_entry->permanent = FALSE;
1280         new_entry->superpage_size = 0;
1281
1282         new_entry->alias = 0;
1283         new_entry->zero_wired_pages = FALSE;
1284
1285         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1286
1287         /*
1288          *      Insert the new entry into the list
1289          */
1290
1291         vm_map_store_entry_link(map, entry, new_entry);
1292
1293         map->size += size;
1294
1295         /*
1296          *      Update the lookup hint
1297          */
1298         SAVE_HINT_MAP_WRITE(map, new_entry);
1299
1300         *o_entry = new_entry;
1301         return(KERN_SUCCESS);
1302 }
1303
1304 int vm_map_pmap_enter_print = FALSE;
1305 int vm_map_pmap_enter_enable = FALSE;
1306
1307 /*
1308  *      Routine:        vm_map_pmap_enter [internal only]
1309  *
1310  *      Description:
1311  *              Force pages from the specified object to be entered into
1312  *              the pmap at the specified address if they are present.
1313  *              As soon as a page not found in the object the scan ends.
1314  *
1315  *      Returns:
1316  *              Nothing.
1317  *
1318  *      In/out conditions:
1319  *              The source map should not be locked on entry.
1320  */
1321 static void
1322 vm_map_pmap_enter(
1323         vm_map_t                map,
1324         register vm_map_offset_t        addr,
1325         register vm_map_offset_t        end_addr,
1326         register vm_object_t    object,
1327         vm_object_offset_t      offset,
1328         vm_prot_t               protection)
1329 {
1330         int                     type_of_fault;
1331         kern_return_t           kr;
1332
1333         if(map->pmap == 0)
1334                 return;
1335
1336         while (addr < end_addr) {
1337                 register vm_page_t      m;
1338
1339                 vm_object_lock(object);
1340
1341                 m = vm_page_lookup(object, offset);
1342                 /*
1343                  * ENCRYPTED SWAP:
1344                  * The user should never see encrypted data, so do not
1345                  * enter an encrypted page in the page table.
1346                  */
1347                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1348                     m->fictitious ||
1349                     (m->unusual && ( m->error || m->restart || m->absent))) {
1350                         vm_object_unlock(object);
1351                         return;
1352                 }
1353
1354                 if (vm_map_pmap_enter_print) {
1355                         printf("vm_map_pmap_enter:");
1356                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1357                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1358                 }
1359                 type_of_fault = DBG_CACHE_HIT_FAULT;
1360                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1361                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1362                                     &type_of_fault);
1363
1364                 vm_object_unlock(object);
1365
1366                 offset += PAGE_SIZE_64;
1367                 addr += PAGE_SIZE;
1368         }
1369 }
1370
1371 boolean_t vm_map_pmap_is_empty(
1372         vm_map_t        map,
1373         vm_map_offset_t start,
1374         vm_map_offset_t end);
1375 boolean_t vm_map_pmap_is_empty(
1376         vm_map_t        map,
1377         vm_map_offset_t start,
1378         vm_map_offset_t end)
1379 {
1380 #ifdef MACHINE_PMAP_IS_EMPTY
1381         return pmap_is_empty(map->pmap, start, end);
1382 #else   /* MACHINE_PMAP_IS_EMPTY */
1383         vm_map_offset_t offset;
1384         ppnum_t         phys_page;
1385
1386         if (map->pmap == NULL) {
1387                 return TRUE;
1388         }
1389
1390         for (offset = start;
1391              offset < end;
1392              offset += PAGE_SIZE) {
1393                 phys_page = pmap_find_phys(map->pmap, offset);
1394                 if (phys_page) {
1395                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1396                                 "page %d at 0x%llx\n",
1397                                 map, (long long)start, (long long)end,
1398                                 phys_page, (long long)offset);
1399                         return FALSE;
1400                 }
1401         }
1402         return TRUE;
1403 #endif  /* MACHINE_PMAP_IS_EMPTY */
1404 }
1405
1406 /*
1407  *      Routine:        vm_map_enter
1408  *
1409  *      Description:
1410  *              Allocate a range in the specified virtual address map.
1411  *              The resulting range will refer to memory defined by
1412  *              the given memory object and offset into that object.
1413  *
1414  *              Arguments are as defined in the vm_map call.
1415  */
1416 int _map_enter_debug = 0;
1417 static unsigned int vm_map_enter_restore_successes = 0;
1418 static unsigned int vm_map_enter_restore_failures = 0;
1419 kern_return_t
1420 vm_map_enter(
1421         vm_map_t                map,
1422         vm_map_offset_t         *address,       /* IN/OUT */
1423         vm_map_size_t           size,
1424         vm_map_offset_t         mask,
1425         int                     flags,
1426         vm_object_t             object,
1427         vm_object_offset_t      offset,
1428         boolean_t               needs_copy,
1429         vm_prot_t               cur_protection,
1430         vm_prot_t               max_protection,
1431         vm_inherit_t            inheritance)
1432 {
1433         vm_map_entry_t          entry, new_entry;
1434         vm_map_offset_t         start, tmp_start, tmp_offset;
1435         vm_map_offset_t         end, tmp_end;
1436         vm_map_offset_t         tmp2_start, tmp2_end;
1437         vm_map_offset_t         step;
1438         kern_return_t           result = KERN_SUCCESS;
1439         vm_map_t                zap_old_map = VM_MAP_NULL;
1440         vm_map_t                zap_new_map = VM_MAP_NULL;
1441         boolean_t               map_locked = FALSE;
1442         boolean_t               pmap_empty = TRUE;
1443         boolean_t               new_mapping_established = FALSE;
1444         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1445         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1446         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1447         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1448         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1449         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1450         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1451         char                    alias;
1452         vm_map_offset_t         effective_min_offset, effective_max_offset;
1453         kern_return_t           kr;
1454
1455         if (superpage_size) {
1456                 switch (superpage_size) {
1457                         /*
1458                          * Note that the current implementation only supports
1459                          * a single size for superpages, SUPERPAGE_SIZE, per
1460                          * architecture. As soon as more sizes are supposed
1461                          * to be supported, SUPERPAGE_SIZE has to be replaced
1462                          * with a lookup of the size depending on superpage_size.
1463                          */
1464 #ifdef __x86_64__
1465                         case SUPERPAGE_SIZE_ANY:
1466                                 /* handle it like 2 MB and round up to page size */
1467                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1468                         case SUPERPAGE_SIZE_2MB:
1469                                 break;
1470 #endif
1471                         default:
1472                                 return KERN_INVALID_ARGUMENT;
1473                 }
1474                 mask = SUPERPAGE_SIZE-1;
1475                 if (size & (SUPERPAGE_SIZE-1))
1476                         return KERN_INVALID_ARGUMENT;
1477                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1478         }
1479
1480
1481 #if CONFIG_EMBEDDED
1482         if (cur_protection & VM_PROT_WRITE){
1483                 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1484                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1485                         cur_protection &= ~VM_PROT_EXECUTE;
1486                 }
1487         }
1488 #endif /* CONFIG_EMBEDDED */
1489
1490         if (is_submap) {
1491                 if (purgable) {
1492                         /* submaps can not be purgeable */
1493                         return KERN_INVALID_ARGUMENT;
1494                 }
1495                 if (object == VM_OBJECT_NULL) {
1496                         /* submaps can not be created lazily */
1497                         return KERN_INVALID_ARGUMENT;
1498                 }
1499         }
1500         if (flags & VM_FLAGS_ALREADY) {
1501                 /*
1502                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1503                  * is already present.  For it to be meaningul, the requested
1504                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1505                  * we shouldn't try and remove what was mapped there first
1506                  * (!VM_FLAGS_OVERWRITE).
1507                  */
1508                 if ((flags & VM_FLAGS_ANYWHERE) ||
1509                     (flags & VM_FLAGS_OVERWRITE)) {
1510                         return KERN_INVALID_ARGUMENT;
1511                 }
1512         }
1513
1514         effective_min_offset = map->min_offset;
1515
1516         if (flags & VM_FLAGS_BEYOND_MAX) {
1517                 /*
1518                  * Allow an insertion beyond the map's max offset.
1519                  */
1520                 if (vm_map_is_64bit(map))
1521                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1522                 else
1523                         effective_max_offset = 0x00000000FFFFF000ULL;
1524         } else {
1525                 effective_max_offset = map->max_offset;
1526         }
1527
1528         if (size == 0 ||
1529             (offset & PAGE_MASK_64) != 0) {
1530                 *address = 0;
1531                 return KERN_INVALID_ARGUMENT;
1532         }
1533
1534         VM_GET_FLAGS_ALIAS(flags, alias);
1535
1536 #define RETURN(value)   { result = value; goto BailOut; }
1537
1538         assert(page_aligned(*address));
1539         assert(page_aligned(size));
1540
1541         /*
1542          * Only zero-fill objects are allowed to be purgable.
1543          * LP64todo - limit purgable objects to 32-bits for now
1544          */
1545         if (purgable &&
1546             (offset != 0 ||
1547              (object != VM_OBJECT_NULL &&
1548               (object->vo_size != size ||
1549                object->purgable == VM_PURGABLE_DENY))
1550              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1551                 return KERN_INVALID_ARGUMENT;
1552
1553         if (!anywhere && overwrite) {
1554                 /*
1555                  * Create a temporary VM map to hold the old mappings in the
1556                  * affected area while we create the new one.
1557                  * This avoids releasing the VM map lock in
1558                  * vm_map_entry_delete() and allows atomicity
1559                  * when we want to replace some mappings with a new one.
1560                  * It also allows us to restore the old VM mappings if the
1561                  * new mapping fails.
1562                  */
1563                 zap_old_map = vm_map_create(PMAP_NULL,
1564                                             *address,
1565                                             *address + size,
1566                                             map->hdr.entries_pageable);
1567         }
1568
1569 StartAgain: ;
1570
1571         start = *address;
1572
1573         if (anywhere) {
1574                 vm_map_lock(map);
1575                 map_locked = TRUE;
1576
1577                 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1578                         result = KERN_INVALID_ARGUMENT;
1579                         goto BailOut;
1580                 }
1581
1582                 /*
1583                  *      Calculate the first possible address.
1584                  */
1585
1586                 if (start < effective_min_offset)
1587                         start = effective_min_offset;
1588                 if (start > effective_max_offset)
1589                         RETURN(KERN_NO_SPACE);
1590
1591                 /*
1592                  *      Look for the first possible address;
1593                  *      if there's already something at this
1594                  *      address, we have to start after it.
1595                  */
1596
1597                 if( map->disable_vmentry_reuse == TRUE) {
1598                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1599                 } else {
1600                         assert(first_free_is_valid(map));
1601
1602                         entry = map->first_free;
1603
1604                         if (entry == vm_map_to_entry(map)) {
1605                                 entry = NULL;
1606                         } else {
1607                                if (entry->vme_next == vm_map_to_entry(map)){
1608                                        /*
1609                                         * Hole at the end of the map.
1610                                         */
1611                                         entry = NULL;
1612                                } else {
1613                                         if (start < (entry->vme_next)->vme_start ) {
1614                                                 start = entry->vme_end;
1615                                         } else {
1616                                                 /*
1617                                                  * Need to do a lookup.
1618                                                  */
1619                                                 entry = NULL;
1620                                         }
1621                                }
1622                         }
1623
1624                         if (entry == NULL) {
1625                                 vm_map_entry_t  tmp_entry;
1626                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
1627                                         start = tmp_entry->vme_end;
1628                                 entry = tmp_entry;
1629                         }
1630                 }
1631
1632                 /*
1633                  *      In any case, the "entry" always precedes
1634                  *      the proposed new region throughout the
1635                  *      loop:
1636                  */
1637
1638                 while (TRUE) {
1639                         register vm_map_entry_t next;
1640
1641                         /*
1642                          *      Find the end of the proposed new region.
1643                          *      Be sure we didn't go beyond the end, or
1644                          *      wrap around the address.
1645                          */
1646
1647                         end = ((start + mask) & ~mask);
1648                         if (end < start)
1649                                 RETURN(KERN_NO_SPACE);
1650                         start = end;
1651                         end += size;
1652
1653                         if ((end > effective_max_offset) || (end < start)) {
1654                                 if (map->wait_for_space) {
1655                                         if (size <= (effective_max_offset -
1656                                                      effective_min_offset)) {
1657                                                 assert_wait((event_t)map,
1658                                                             THREAD_ABORTSAFE);
1659                                                 vm_map_unlock(map);
1660                                                 map_locked = FALSE;
1661                                                 thread_block(THREAD_CONTINUE_NULL);
1662                                                 goto StartAgain;
1663                                         }
1664                                 }
1665                                 RETURN(KERN_NO_SPACE);
1666                         }
1667
1668                         /*
1669                          *      If there are no more entries, we must win.
1670                          */
1671
1672                         next = entry->vme_next;
1673                         if (next == vm_map_to_entry(map))
1674                                 break;
1675
1676                         /*
1677                          *      If there is another entry, it must be
1678                          *      after the end of the potential new region.
1679                          */
1680
1681                         if (next->vme_start >= end)
1682                                 break;
1683
1684                         /*
1685                          *      Didn't fit -- move to the next entry.
1686                          */
1687
1688                         entry = next;
1689                         start = entry->vme_end;
1690                 }
1691                 *address = start;
1692         } else {
1693                 /*
1694                  *      Verify that:
1695                  *              the address doesn't itself violate
1696                  *              the mask requirement.
1697                  */
1698
1699                 vm_map_lock(map);
1700                 map_locked = TRUE;
1701                 if ((start & mask) != 0)
1702                         RETURN(KERN_NO_SPACE);
1703
1704                 /*
1705                  *      ...     the address is within bounds
1706                  */
1707
1708                 end = start + size;
1709
1710                 if ((start < effective_min_offset) ||
1711                     (end > effective_max_offset) ||
1712                     (start >= end)) {
1713                         RETURN(KERN_INVALID_ADDRESS);
1714                 }
1715
1716                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1717                         /*
1718                          * Fixed mapping and "overwrite" flag: attempt to
1719                          * remove all existing mappings in the specified
1720                          * address range, saving them in our "zap_old_map".
1721                          */
1722                         (void) vm_map_delete(map, start, end,
1723                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1724                                              zap_old_map);
1725                 }
1726
1727                 /*
1728                  *      ...     the starting address isn't allocated
1729                  */
1730
1731                 if (vm_map_lookup_entry(map, start, &entry)) {
1732                         if (! (flags & VM_FLAGS_ALREADY)) {
1733                                 RETURN(KERN_NO_SPACE);
1734                         }
1735                         /*
1736                          * Check if what's already there is what we want.
1737                          */
1738                         tmp_start = start;
1739                         tmp_offset = offset;
1740                         if (entry->vme_start < start) {
1741                                 tmp_start -= start - entry->vme_start;
1742                                 tmp_offset -= start - entry->vme_start;
1743
1744                         }
1745                         for (; entry->vme_start < end;
1746                              entry = entry->vme_next) {
1747                                 /*
1748                                  * Check if the mapping's attributes
1749                                  * match the existing map entry.
1750                                  */
1751                                 if (entry == vm_map_to_entry(map) ||
1752                                     entry->vme_start != tmp_start ||
1753                                     entry->is_sub_map != is_submap ||
1754                                     entry->offset != tmp_offset ||
1755                                     entry->needs_copy != needs_copy ||
1756                                     entry->protection != cur_protection ||
1757                                     entry->max_protection != max_protection ||
1758                                     entry->inheritance != inheritance ||
1759                                     entry->alias != alias) {
1760                                         /* not the same mapping ! */
1761                                         RETURN(KERN_NO_SPACE);
1762                                 }
1763                                 /*
1764                                  * Check if the same object is being mapped.
1765                                  */
1766                                 if (is_submap) {
1767                                         if (entry->object.sub_map !=
1768                                             (vm_map_t) object) {
1769                                                 /* not the same submap */
1770                                                 RETURN(KERN_NO_SPACE);
1771                                         }
1772                                 } else {
1773                                         if (entry->object.vm_object != object) {
1774                                                 /* not the same VM object... */
1775                                                 vm_object_t obj2;
1776
1777                                                 obj2 = entry->object.vm_object;
1778                                                 if ((obj2 == VM_OBJECT_NULL ||
1779                                                      obj2->internal) &&
1780                                                     (object == VM_OBJECT_NULL ||
1781                                                      object->internal)) {
1782                                                         /*
1783                                                          * ... but both are
1784                                                          * anonymous memory,
1785                                                          * so equivalent.
1786                                                          */
1787                                                 } else {
1788                                                         RETURN(KERN_NO_SPACE);
1789                                                 }
1790                                         }
1791                                 }
1792
1793                                 tmp_offset += entry->vme_end - entry->vme_start;
1794                                 tmp_start += entry->vme_end - entry->vme_start;
1795                                 if (entry->vme_end >= end) {
1796                                         /* reached the end of our mapping */
1797                                         break;
1798                                 }
1799                         }
1800                         /* it all matches:  let's use what's already there ! */
1801                         RETURN(KERN_MEMORY_PRESENT);
1802                 }
1803
1804                 /*
1805                  *      ...     the next region doesn't overlap the
1806                  *              end point.
1807                  */
1808
1809                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1810                     (entry->vme_next->vme_start < end))
1811                         RETURN(KERN_NO_SPACE);
1812         }
1813
1814         /*
1815          *      At this point,
1816          *              "start" and "end" should define the endpoints of the
1817          *                      available new range, and
1818          *              "entry" should refer to the region before the new
1819          *                      range, and
1820          *
1821          *              the map should be locked.
1822          */
1823
1824         /*
1825          *      See whether we can avoid creating a new entry (and object) by
1826          *      extending one of our neighbors.  [So far, we only attempt to
1827          *      extend from below.]  Note that we can never extend/join
1828          *      purgable objects because they need to remain distinct
1829          *      entities in order to implement their "volatile object"
1830          *      semantics.
1831          */
1832
1833         if (purgable) {
1834                 if (object == VM_OBJECT_NULL) {
1835                         object = vm_object_allocate(size);
1836                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1837                         object->purgable = VM_PURGABLE_NONVOLATILE;
1838                         offset = (vm_object_offset_t)0;
1839                 }
1840         } else if ((is_submap == FALSE) &&
1841                    (object == VM_OBJECT_NULL) &&
1842                    (entry != vm_map_to_entry(map)) &&
1843                    (entry->vme_end == start) &&
1844                    (!entry->is_shared) &&
1845                    (!entry->is_sub_map) &&
1846                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1847                    (entry->inheritance == inheritance) &&
1848                    (entry->protection == cur_protection) &&
1849                    (entry->max_protection == max_protection) &&
1850                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1851                    (entry->in_transition == 0) &&
1852                    (entry->no_cache == no_cache) &&
1853                    ((entry->vme_end - entry->vme_start) + size <=
1854                     (alias == VM_MEMORY_REALLOC ?
1855                      ANON_CHUNK_SIZE :
1856                      NO_COALESCE_LIMIT)) &&
1857                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1858                 if (vm_object_coalesce(entry->object.vm_object,
1859                                        VM_OBJECT_NULL,
1860                                        entry->offset,
1861                                        (vm_object_offset_t) 0,
1862                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1863                                        (vm_map_size_t)(end - entry->vme_end))) {
1864
1865                         /*
1866                          *      Coalesced the two objects - can extend
1867                          *      the previous map entry to include the
1868                          *      new range.
1869                          */
1870                         map->size += (end - entry->vme_end);
1871                         entry->vme_end = end;
1872                         vm_map_store_update_first_free(map, map->first_free);
1873                         RETURN(KERN_SUCCESS);
1874                 }
1875         }
1876
1877         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1878         new_entry = NULL;
1879
1880         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1881                 tmp2_end = tmp2_start + step;
1882                 /*
1883                  *      Create a new entry
1884                  *      LP64todo - for now, we can only allocate 4GB internal objects
1885                  *      because the default pager can't page bigger ones.  Remove this
1886                  *      when it can.
1887                  *
1888                  * XXX FBDP
1889                  * The reserved "page zero" in each process's address space can
1890                  * be arbitrarily large.  Splitting it into separate 4GB objects and
1891                  * therefore different VM map entries serves no purpose and just
1892                  * slows down operations on the VM map, so let's not split the
1893                  * allocation into 4GB chunks if the max protection is NONE.  That
1894                  * memory should never be accessible, so it will never get to the
1895                  * default pager.
1896                  */
1897                 tmp_start = tmp2_start;
1898                 if (object == VM_OBJECT_NULL &&
1899                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1900                     max_protection != VM_PROT_NONE &&
1901                     superpage_size == 0)
1902                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1903                 else
1904                         tmp_end = tmp2_end;
1905                 do {
1906                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1907                                                         object, offset, needs_copy,
1908                                                         FALSE, FALSE,
1909                                                         cur_protection, max_protection,
1910                                                         VM_BEHAVIOR_DEFAULT,
1911                                                         (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1912                                                         0, no_cache,
1913                                                         permanent, superpage_size);
1914                         new_entry->alias = alias;
1915                         if (flags & VM_FLAGS_MAP_JIT){
1916                                 if (!(map->jit_entry_exists)){
1917                                         new_entry->used_for_jit = TRUE;
1918                                         map->jit_entry_exists = TRUE;
1919                                 }
1920                         }
1921
1922                         if (is_submap) {
1923                                 vm_map_t        submap;
1924                                 boolean_t       submap_is_64bit;
1925                                 boolean_t       use_pmap;
1926
1927                                 new_entry->is_sub_map = TRUE;
1928                                 submap = (vm_map_t) object;
1929                                 submap_is_64bit = vm_map_is_64bit(submap);
1930                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1931         #ifndef NO_NESTED_PMAP
1932                                 if (use_pmap && submap->pmap == NULL) {
1933                                         /* we need a sub pmap to nest... */
1934                                         submap->pmap = pmap_create(0, submap_is_64bit);
1935                                         if (submap->pmap == NULL) {
1936                                                 /* let's proceed without nesting... */
1937                                         }
1938                                 }
1939                                 if (use_pmap && submap->pmap != NULL) {
1940                                         kr = pmap_nest(map->pmap,
1941                                                        submap->pmap,
1942                                                        tmp_start,
1943                                                        tmp_start,
1944                                                        tmp_end - tmp_start);
1945                                         if (kr != KERN_SUCCESS) {
1946                                                 printf("vm_map_enter: "
1947                                                        "pmap_nest(0x%llx,0x%llx) "
1948                                                        "error 0x%x\n",
1949                                                        (long long)tmp_start,
1950                                                        (long long)tmp_end,
1951                                                        kr);
1952                                         } else {
1953                                                 /* we're now nested ! */
1954                                                 new_entry->use_pmap = TRUE;
1955                                                 pmap_empty = FALSE;
1956                                         }
1957                                 }
1958         #endif /* NO_NESTED_PMAP */
1959                         }
1960                         entry = new_entry;
1961
1962                         if (superpage_size) {
1963                                 vm_page_t pages, m;
1964                                 vm_object_t sp_object;
1965
1966                                 entry->offset = 0;
1967
1968                                 /* allocate one superpage */
1969                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
1970                                 if (kr != KERN_SUCCESS) {
1971                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
1972                                         RETURN(kr);
1973                                 }
1974
1975                                 /* create one vm_object per superpage */
1976                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
1977                                 sp_object->phys_contiguous = TRUE;
1978                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
1979                                 entry->object.vm_object = sp_object;
1980
1981                                 /* enter the base pages into the object */
1982                                 vm_object_lock(sp_object);
1983                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
1984                                         m = pages;
1985                                         pmap_zero_page(m->phys_page);
1986                                         pages = NEXT_PAGE(m);
1987                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1988                                         vm_page_insert(m, sp_object, offset);
1989                                 }
1990                                 vm_object_unlock(sp_object);
1991                         }
1992                 } while (tmp_end != tmp2_end &&
1993                          (tmp_start = tmp_end) &&
1994                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
1995                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
1996         }
1997
1998         vm_map_unlock(map);
1999         map_locked = FALSE;
2000
2001         new_mapping_established = TRUE;
2002
2003         /*      Wire down the new entry if the user
2004          *      requested all new map entries be wired.
2005          */
2006         if ((map->wiring_required)||(superpage_size)) {
2007                 pmap_empty = FALSE; /* pmap won't be empty */
2008                 result = vm_map_wire(map, start, end,
2009                                      new_entry->protection, TRUE);
2010                 RETURN(result);
2011         }
2012
2013         if ((object != VM_OBJECT_NULL) &&
2014             (vm_map_pmap_enter_enable) &&
2015             (!anywhere)  &&
2016             (!needs_copy) &&
2017             (size < (128*1024))) {
2018                 pmap_empty = FALSE; /* pmap won't be empty */
2019
2020                 if (override_nx(map, alias) && cur_protection)
2021                         cur_protection |= VM_PROT_EXECUTE;
2022
2023                 vm_map_pmap_enter(map, start, end,
2024                                   object, offset, cur_protection);
2025         }
2026
2027 BailOut: ;
2028         if (result == KERN_SUCCESS) {
2029                 vm_prot_t pager_prot;
2030                 memory_object_t pager;
2031
2032                 if (pmap_empty &&
2033                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2034                         assert(vm_map_pmap_is_empty(map,
2035                                                     *address,
2036                                                     *address+size));
2037                 }
2038
2039                 /*
2040                  * For "named" VM objects, let the pager know that the
2041                  * memory object is being mapped.  Some pagers need to keep
2042                  * track of this, to know when they can reclaim the memory
2043                  * object, for example.
2044                  * VM calls memory_object_map() for each mapping (specifying
2045                  * the protection of each mapping) and calls
2046                  * memory_object_last_unmap() when all the mappings are gone.
2047                  */
2048                 pager_prot = max_protection;
2049                 if (needs_copy) {
2050                         /*
2051                          * Copy-On-Write mapping: won't modify
2052                          * the memory object.
2053                          */
2054                         pager_prot &= ~VM_PROT_WRITE;
2055                 }
2056                 if (!is_submap &&
2057                     object != VM_OBJECT_NULL &&
2058                     object->named &&
2059                     object->pager != MEMORY_OBJECT_NULL) {
2060                         vm_object_lock(object);
2061                         pager = object->pager;
2062                         if (object->named &&
2063                             pager != MEMORY_OBJECT_NULL) {
2064                                 assert(object->pager_ready);
2065                                 vm_object_mapping_wait(object, THREAD_UNINT);
2066                                 vm_object_mapping_begin(object);
2067                                 vm_object_unlock(object);
2068
2069                                 kr = memory_object_map(pager, pager_prot);
2070                                 assert(kr == KERN_SUCCESS);
2071
2072                                 vm_object_lock(object);
2073                                 vm_object_mapping_end(object);
2074                         }
2075                         vm_object_unlock(object);
2076                 }
2077         } else {
2078                 if (new_mapping_established) {
2079                         /*
2080                          * We have to get rid of the new mappings since we
2081                          * won't make them available to the user.
2082                          * Try and do that atomically, to minimize the risk
2083                          * that someone else create new mappings that range.
2084                          */
2085                         zap_new_map = vm_map_create(PMAP_NULL,
2086                                                     *address,
2087                                                     *address + size,
2088                                                     map->hdr.entries_pageable);
2089                         if (!map_locked) {
2090                                 vm_map_lock(map);
2091                                 map_locked = TRUE;
2092                         }
2093                         (void) vm_map_delete(map, *address, *address+size,
2094                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2095                                              zap_new_map);
2096                 }
2097                 if (zap_old_map != VM_MAP_NULL &&
2098                     zap_old_map->hdr.nentries != 0) {
2099                         vm_map_entry_t  entry1, entry2;
2100
2101                         /*
2102                          * The new mapping failed.  Attempt to restore
2103                          * the old mappings, saved in the "zap_old_map".
2104                          */
2105                         if (!map_locked) {
2106                                 vm_map_lock(map);
2107                                 map_locked = TRUE;
2108                         }
2109
2110                         /* first check if the coast is still clear */
2111                         start = vm_map_first_entry(zap_old_map)->vme_start;
2112                         end = vm_map_last_entry(zap_old_map)->vme_end;
2113                         if (vm_map_lookup_entry(map, start, &entry1) ||
2114                             vm_map_lookup_entry(map, end, &entry2) ||
2115                             entry1 != entry2) {
2116                                 /*
2117                                  * Part of that range has already been
2118                                  * re-mapped:  we can't restore the old
2119                                  * mappings...
2120                                  */
2121                                 vm_map_enter_restore_failures++;
2122                         } else {
2123                                 /*
2124                                  * Transfer the saved map entries from
2125                                  * "zap_old_map" to the original "map",
2126                                  * inserting them all after "entry1".
2127                                  */
2128                                 for (entry2 = vm_map_first_entry(zap_old_map);
2129                                      entry2 != vm_map_to_entry(zap_old_map);
2130                                      entry2 = vm_map_first_entry(zap_old_map)) {
2131                                         vm_map_size_t entry_size;
2132
2133                                         entry_size = (entry2->vme_end -
2134                                                       entry2->vme_start);
2135                                         vm_map_store_entry_unlink(zap_old_map,
2136                                                             entry2);
2137                                         zap_old_map->size -= entry_size;
2138                                         vm_map_store_entry_link(map, entry1, entry2);
2139                                         map->size += entry_size;
2140                                         entry1 = entry2;
2141                                 }
2142                                 if (map->wiring_required) {
2143                                         /*
2144                                          * XXX TODO: we should rewire the
2145                                          * old pages here...
2146                                          */
2147                                 }
2148                                 vm_map_enter_restore_successes++;
2149                         }
2150                 }
2151         }
2152
2153         if (map_locked) {
2154                 vm_map_unlock(map);
2155         }
2156
2157         /*
2158          * Get rid of the "zap_maps" and all the map entries that
2159          * they may still contain.
2160          */
2161         if (zap_old_map != VM_MAP_NULL) {
2162                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2163                 zap_old_map = VM_MAP_NULL;
2164         }
2165         if (zap_new_map != VM_MAP_NULL) {
2166                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2167                 zap_new_map = VM_MAP_NULL;
2168         }
2169
2170         return result;
2171
2172 #undef  RETURN
2173 }
2174
2175 kern_return_t
2176 vm_map_enter_mem_object(
2177         vm_map_t                target_map,
2178         vm_map_offset_t         *address,
2179         vm_map_size_t           initial_size,
2180         vm_map_offset_t         mask,
2181         int                     flags,
2182         ipc_port_t              port,
2183         vm_object_offset_t      offset,
2184         boolean_t               copy,
2185         vm_prot_t               cur_protection,
2186         vm_prot_t               max_protection,
2187         vm_inherit_t            inheritance)
2188 {
2189         vm_map_address_t        map_addr;
2190         vm_map_size_t           map_size;
2191         vm_object_t             object;
2192         vm_object_size_t        size;
2193         kern_return_t           result;
2194         boolean_t               mask_cur_protection, mask_max_protection;
2195
2196         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2197         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2198         cur_protection &= ~VM_PROT_IS_MASK;
2199         max_protection &= ~VM_PROT_IS_MASK;
2200
2201         /*
2202          * Check arguments for validity
2203          */
2204         if ((target_map == VM_MAP_NULL) ||
2205             (cur_protection & ~VM_PROT_ALL) ||
2206             (max_protection & ~VM_PROT_ALL) ||
2207             (inheritance > VM_INHERIT_LAST_VALID) ||
2208             initial_size == 0)
2209                 return KERN_INVALID_ARGUMENT;
2210
2211         map_addr = vm_map_trunc_page(*address);
2212         map_size = vm_map_round_page(initial_size);
2213         size = vm_object_round_page(initial_size);
2214
2215         /*
2216          * Find the vm object (if any) corresponding to this port.
2217          */
2218         if (!IP_VALID(port)) {
2219                 object = VM_OBJECT_NULL;
2220                 offset = 0;
2221                 copy = FALSE;
2222         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2223                 vm_named_entry_t        named_entry;
2224
2225                 named_entry = (vm_named_entry_t) port->ip_kobject;
2226                 /* a few checks to make sure user is obeying rules */
2227                 if (size == 0) {
2228                         if (offset >= named_entry->size)
2229                                 return KERN_INVALID_RIGHT;
2230                         size = named_entry->size - offset;
2231                 }
2232                 if (mask_max_protection) {
2233                         max_protection &= named_entry->protection;
2234                 }
2235                 if (mask_cur_protection) {
2236                         cur_protection &= named_entry->protection;
2237                 }
2238                 if ((named_entry->protection & max_protection) !=
2239                     max_protection)
2240                         return KERN_INVALID_RIGHT;
2241                 if ((named_entry->protection & cur_protection) !=
2242                     cur_protection)
2243                         return KERN_INVALID_RIGHT;
2244                 if (named_entry->size < (offset + size))
2245                         return KERN_INVALID_ARGUMENT;
2246
2247                 /* the callers parameter offset is defined to be the */
2248                 /* offset from beginning of named entry offset in object */
2249                 offset = offset + named_entry->offset;
2250
2251                 named_entry_lock(named_entry);
2252                 if (named_entry->is_sub_map) {
2253                         vm_map_t                submap;
2254
2255                         submap = named_entry->backing.map;
2256                         vm_map_lock(submap);
2257                         vm_map_reference(submap);
2258                         vm_map_unlock(submap);
2259                         named_entry_unlock(named_entry);
2260
2261                         result = vm_map_enter(target_map,
2262                                               &map_addr,
2263                                               map_size,
2264                                               mask,
2265                                               flags | VM_FLAGS_SUBMAP,
2266                                               (vm_object_t) submap,
2267                                               offset,
2268                                               copy,
2269                                               cur_protection,
2270                                               max_protection,
2271                                               inheritance);
2272                         if (result != KERN_SUCCESS) {
2273                                 vm_map_deallocate(submap);
2274                         } else {
2275                                 /*
2276                                  * No need to lock "submap" just to check its
2277                                  * "mapped" flag: that flag is never reset
2278                                  * once it's been set and if we race, we'll
2279                                  * just end up setting it twice, which is OK.
2280                                  */
2281                                 if (submap->mapped == FALSE) {
2282                                         /*
2283                                          * This submap has never been mapped.
2284                                          * Set its "mapped" flag now that it
2285                                          * has been mapped.
2286                                          * This happens only for the first ever
2287                                          * mapping of a "submap".
2288                                          */
2289                                         vm_map_lock(submap);
2290                                         submap->mapped = TRUE;
2291                                         vm_map_unlock(submap);
2292                                 }
2293                                 *address = map_addr;
2294                         }
2295                         return result;
2296
2297                 } else if (named_entry->is_pager) {
2298                         unsigned int    access;
2299                         vm_prot_t       protections;
2300                         unsigned int    wimg_mode;
2301
2302                         protections = named_entry->protection & VM_PROT_ALL;
2303                         access = GET_MAP_MEM(named_entry->protection);
2304
2305                         object = vm_object_enter(named_entry->backing.pager,
2306                                                  named_entry->size,
2307                                                  named_entry->internal,
2308                                                  FALSE,
2309                                                  FALSE);
2310                         if (object == VM_OBJECT_NULL) {
2311                                 named_entry_unlock(named_entry);
2312                                 return KERN_INVALID_OBJECT;
2313                         }
2314
2315                         /* JMM - drop reference on pager here */
2316
2317                         /* create an extra ref for the named entry */
2318                         vm_object_lock(object);
2319                         vm_object_reference_locked(object);
2320                         named_entry->backing.object = object;
2321                         named_entry->is_pager = FALSE;
2322                         named_entry_unlock(named_entry);
2323
2324                         wimg_mode = object->wimg_bits;
2325
2326                         if (access == MAP_MEM_IO) {
2327                                 wimg_mode = VM_WIMG_IO;
2328                         } else if (access == MAP_MEM_COPYBACK) {
2329                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2330                         } else if (access == MAP_MEM_WTHRU) {
2331                                 wimg_mode = VM_WIMG_WTHRU;
2332                         } else if (access == MAP_MEM_WCOMB) {
2333                                 wimg_mode = VM_WIMG_WCOMB;
2334                         }
2335
2336                         /* wait for object (if any) to be ready */
2337                         if (!named_entry->internal) {
2338                                 while (!object->pager_ready) {
2339                                         vm_object_wait(
2340                                                 object,
2341                                                 VM_OBJECT_EVENT_PAGER_READY,
2342                                                 THREAD_UNINT);
2343                                         vm_object_lock(object);
2344                                 }
2345                         }
2346
2347                         if (object->wimg_bits != wimg_mode)
2348                                 vm_object_change_wimg_mode(object, wimg_mode);
2349
2350                         object->true_share = TRUE;
2351
2352                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2353                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2354                         vm_object_unlock(object);
2355                 } else {
2356                         /* This is the case where we are going to map */
2357                         /* an already mapped object.  If the object is */
2358                         /* not ready it is internal.  An external     */
2359                         /* object cannot be mapped until it is ready  */
2360                         /* we can therefore avoid the ready check     */
2361                         /* in this case.  */
2362                         object = named_entry->backing.object;
2363                         assert(object != VM_OBJECT_NULL);
2364                         named_entry_unlock(named_entry);
2365                         vm_object_reference(object);
2366                 }
2367         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2368                 /*
2369                  * JMM - This is temporary until we unify named entries
2370                  * and raw memory objects.
2371                  *
2372                  * Detected fake ip_kotype for a memory object.  In
2373                  * this case, the port isn't really a port at all, but
2374                  * instead is just a raw memory object.
2375                  */
2376
2377                 object = vm_object_enter((memory_object_t)port,
2378                                          size, FALSE, FALSE, FALSE);
2379                 if (object == VM_OBJECT_NULL)
2380                         return KERN_INVALID_OBJECT;
2381
2382                 /* wait for object (if any) to be ready */
2383                 if (object != VM_OBJECT_NULL) {
2384                         if (object == kernel_object) {
2385                                 printf("Warning: Attempt to map kernel object"
2386                                         " by a non-private kernel entity\n");
2387                                 return KERN_INVALID_OBJECT;
2388                         }
2389                         if (!object->pager_ready) {
2390                                 vm_object_lock(object);
2391
2392                                 while (!object->pager_ready) {
2393                                         vm_object_wait(object,
2394                                                        VM_OBJECT_EVENT_PAGER_READY,
2395                                                        THREAD_UNINT);
2396                                         vm_object_lock(object);
2397                                 }
2398                                 vm_object_unlock(object);
2399                         }
2400                 }
2401         } else {
2402                 return KERN_INVALID_OBJECT;
2403         }
2404
2405         if (object != VM_OBJECT_NULL &&
2406             object->named &&
2407             object->pager != MEMORY_OBJECT_NULL &&
2408             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2409                 memory_object_t pager;
2410                 vm_prot_t       pager_prot;
2411                 kern_return_t   kr;
2412
2413                 /*
2414                  * For "named" VM objects, let the pager know that the
2415                  * memory object is being mapped.  Some pagers need to keep
2416                  * track of this, to know when they can reclaim the memory
2417                  * object, for example.
2418                  * VM calls memory_object_map() for each mapping (specifying
2419                  * the protection of each mapping) and calls
2420                  * memory_object_last_unmap() when all the mappings are gone.
2421                  */
2422                 pager_prot = max_protection;
2423                 if (copy) {
2424                         /*
2425                          * Copy-On-Write mapping: won't modify the
2426                          * memory object.
2427                          */
2428                         pager_prot &= ~VM_PROT_WRITE;
2429                 }
2430                 vm_object_lock(object);
2431                 pager = object->pager;
2432                 if (object->named &&
2433                     pager != MEMORY_OBJECT_NULL &&
2434                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2435                         assert(object->pager_ready);
2436                         vm_object_mapping_wait(object, THREAD_UNINT);
2437                         vm_object_mapping_begin(object);
2438                         vm_object_unlock(object);
2439
2440                         kr = memory_object_map(pager, pager_prot);
2441                         assert(kr == KERN_SUCCESS);
2442
2443                         vm_object_lock(object);
2444                         vm_object_mapping_end(object);
2445                 }
2446                 vm_object_unlock(object);
2447         }
2448
2449         /*
2450          *      Perform the copy if requested
2451          */
2452
2453         if (copy) {
2454                 vm_object_t             new_object;
2455                 vm_object_offset_t      new_offset;
2456
2457                 result = vm_object_copy_strategically(object, offset, size,
2458                                                       &new_object, &new_offset,
2459                                                       &copy);
2460
2461
2462                 if (result == KERN_MEMORY_RESTART_COPY) {
2463                         boolean_t success;
2464                         boolean_t src_needs_copy;
2465
2466                         /*
2467                          * XXX
2468                          * We currently ignore src_needs_copy.
2469                          * This really is the issue of how to make
2470                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2471                          * non-kernel users to use. Solution forthcoming.
2472                          * In the meantime, since we don't allow non-kernel
2473                          * memory managers to specify symmetric copy,
2474                          * we won't run into problems here.
2475                          */
2476                         new_object = object;
2477                         new_offset = offset;
2478                         success = vm_object_copy_quickly(&new_object,
2479                                                          new_offset, size,
2480                                                          &src_needs_copy,
2481                                                          &copy);
2482                         assert(success);
2483                         result = KERN_SUCCESS;
2484                 }
2485                 /*
2486                  *      Throw away the reference to the
2487                  *      original object, as it won't be mapped.
2488                  */
2489
2490                 vm_object_deallocate(object);
2491
2492                 if (result != KERN_SUCCESS)
2493                         return result;
2494
2495                 object = new_object;
2496                 offset = new_offset;
2497         }
2498
2499         result = vm_map_enter(target_map,
2500                               &map_addr, map_size,
2501                               (vm_map_offset_t)mask,
2502                               flags,
2503                               object, offset,
2504                               copy,
2505                               cur_protection, max_protection, inheritance);
2506         if (result != KERN_SUCCESS)
2507                 vm_object_deallocate(object);
2508         *address = map_addr;
2509         return result;
2510 }
2511
2512
2513
2514
2515 kern_return_t
2516 vm_map_enter_mem_object_control(
2517         vm_map_t                target_map,
2518         vm_map_offset_t         *address,
2519         vm_map_size_t           initial_size,
2520         vm_map_offset_t         mask,
2521         int                     flags,
2522         memory_object_control_t control,
2523         vm_object_offset_t      offset,
2524         boolean_t               copy,
2525         vm_prot_t               cur_protection,
2526         vm_prot_t               max_protection,
2527         vm_inherit_t            inheritance)
2528 {
2529         vm_map_address_t        map_addr;
2530         vm_map_size_t           map_size;
2531         vm_object_t             object;
2532         vm_object_size_t        size;
2533         kern_return_t           result;
2534         memory_object_t         pager;
2535         vm_prot_t               pager_prot;
2536         kern_return_t           kr;
2537
2538         /*
2539          * Check arguments for validity
2540          */
2541         if ((target_map == VM_MAP_NULL) ||
2542             (cur_protection & ~VM_PROT_ALL) ||
2543             (max_protection & ~VM_PROT_ALL) ||
2544             (inheritance > VM_INHERIT_LAST_VALID) ||
2545             initial_size == 0)
2546                 return KERN_INVALID_ARGUMENT;
2547
2548         map_addr = vm_map_trunc_page(*address);
2549         map_size = vm_map_round_page(initial_size);
2550         size = vm_object_round_page(initial_size);
2551
2552         object = memory_object_control_to_vm_object(control);
2553
2554         if (object == VM_OBJECT_NULL)
2555                 return KERN_INVALID_OBJECT;
2556
2557         if (object == kernel_object) {
2558                 printf("Warning: Attempt to map kernel object"
2559                        " by a non-private kernel entity\n");
2560                 return KERN_INVALID_OBJECT;
2561         }
2562
2563         vm_object_lock(object);
2564         object->ref_count++;
2565         vm_object_res_reference(object);
2566
2567         /*
2568          * For "named" VM objects, let the pager know that the
2569          * memory object is being mapped.  Some pagers need to keep
2570          * track of this, to know when they can reclaim the memory
2571          * object, for example.
2572          * VM calls memory_object_map() for each mapping (specifying
2573          * the protection of each mapping) and calls
2574          * memory_object_last_unmap() when all the mappings are gone.
2575          */
2576         pager_prot = max_protection;
2577         if (copy) {
2578                 pager_prot &= ~VM_PROT_WRITE;
2579         }
2580         pager = object->pager;
2581         if (object->named &&
2582             pager != MEMORY_OBJECT_NULL &&
2583             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2584                 assert(object->pager_ready);
2585                 vm_object_mapping_wait(object, THREAD_UNINT);
2586                 vm_object_mapping_begin(object);
2587                 vm_object_unlock(object);
2588
2589                 kr = memory_object_map(pager, pager_prot);
2590                 assert(kr == KERN_SUCCESS);
2591
2592                 vm_object_lock(object);
2593                 vm_object_mapping_end(object);
2594         }
2595         vm_object_unlock(object);
2596
2597         /*
2598          *      Perform the copy if requested
2599          */
2600
2601         if (copy) {
2602                 vm_object_t             new_object;
2603                 vm_object_offset_t      new_offset;
2604
2605                 result = vm_object_copy_strategically(object, offset, size,
2606                                                       &new_object, &new_offset,
2607                                                       &copy);
2608
2609
2610                 if (result == KERN_MEMORY_RESTART_COPY) {
2611                         boolean_t success;
2612                         boolean_t src_needs_copy;
2613
2614                         /*
2615                          * XXX
2616                          * We currently ignore src_needs_copy.
2617                          * This really is the issue of how to make
2618                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2619                          * non-kernel users to use. Solution forthcoming.
2620                          * In the meantime, since we don't allow non-kernel
2621                          * memory managers to specify symmetric copy,
2622                          * we won't run into problems here.
2623                          */
2624                         new_object = object;
2625                         new_offset = offset;
2626                         success = vm_object_copy_quickly(&new_object,
2627                                                          new_offset, size,
2628                                                          &src_needs_copy,
2629                                                          &copy);
2630                         assert(success);
2631                         result = KERN_SUCCESS;
2632                 }
2633                 /*
2634                  *      Throw away the reference to the
2635                  *      original object, as it won't be mapped.
2636                  */
2637
2638                 vm_object_deallocate(object);
2639
2640                 if (result != KERN_SUCCESS)
2641                         return result;
2642
2643                 object = new_object;
2644                 offset = new_offset;
2645         }
2646
2647         result = vm_map_enter(target_map,
2648                               &map_addr, map_size,
2649                               (vm_map_offset_t)mask,
2650                               flags,
2651                               object, offset,
2652                               copy,
2653                               cur_protection, max_protection, inheritance);
2654         if (result != KERN_SUCCESS)
2655                 vm_object_deallocate(object);
2656         *address = map_addr;
2657
2658         return result;
2659 }
2660
2661
2662 #if     VM_CPM
2663
2664 #ifdef MACH_ASSERT
2665 extern pmap_paddr_t     avail_start, avail_end;
2666 #endif
2667
2668 /*
2669  *      Allocate memory in the specified map, with the caveat that
2670  *      the memory is physically contiguous.  This call may fail
2671  *      if the system can't find sufficient contiguous memory.
2672  *      This call may cause or lead to heart-stopping amounts of
2673  *      paging activity.
2674  *
2675  *      Memory obtained from this call should be freed in the
2676  *      normal way, viz., via vm_deallocate.
2677  */
2678 kern_return_t
2679 vm_map_enter_cpm(
2680         vm_map_t                map,
2681         vm_map_offset_t *addr,
2682         vm_map_size_t           size,
2683         int                     flags)
2684 {
2685         vm_object_t             cpm_obj;
2686         pmap_t                  pmap;
2687         vm_page_t               m, pages;
2688         kern_return_t           kr;
2689         vm_map_offset_t         va, start, end, offset;
2690 #if     MACH_ASSERT
2691         vm_map_offset_t         prev_addr;
2692 #endif  /* MACH_ASSERT */
2693
2694         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2695
2696         if (!vm_allocate_cpm_enabled)
2697                 return KERN_FAILURE;
2698
2699         if (size == 0) {
2700                 *addr = 0;
2701                 return KERN_SUCCESS;
2702         }
2703         if (anywhere)
2704                 *addr = vm_map_min(map);
2705         else
2706                 *addr = vm_map_trunc_page(*addr);
2707         size = vm_map_round_page(size);
2708
2709         /*
2710          * LP64todo - cpm_allocate should probably allow
2711          * allocations of >4GB, but not with the current
2712          * algorithm, so just cast down the size for now.
2713          */
2714         if (size > VM_MAX_ADDRESS)
2715                 return KERN_RESOURCE_SHORTAGE;
2716         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2717                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2718                 return kr;
2719
2720         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2721         assert(cpm_obj != VM_OBJECT_NULL);
2722         assert(cpm_obj->internal);
2723         assert(cpm_obj->size == (vm_object_size_t)size);
2724         assert(cpm_obj->can_persist == FALSE);
2725         assert(cpm_obj->pager_created == FALSE);
2726         assert(cpm_obj->pageout == FALSE);
2727         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2728
2729         /*
2730          *      Insert pages into object.
2731          */
2732
2733         vm_object_lock(cpm_obj);
2734         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2735                 m = pages;
2736                 pages = NEXT_PAGE(m);
2737                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2738
2739                 assert(!m->gobbled);
2740                 assert(!m->wanted);
2741                 assert(!m->pageout);
2742                 assert(!m->tabled);
2743                 assert(VM_PAGE_WIRED(m));
2744                 /*
2745                  * ENCRYPTED SWAP:
2746                  * "m" is not supposed to be pageable, so it
2747                  * should not be encrypted.  It wouldn't be safe
2748                  * to enter it in a new VM object while encrypted.
2749                  */
2750                 ASSERT_PAGE_DECRYPTED(m);
2751                 assert(m->busy);
2752                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2753
2754                 m->busy = FALSE;
2755                 vm_page_insert(m, cpm_obj, offset);
2756         }
2757         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2758         vm_object_unlock(cpm_obj);
2759
2760         /*
2761          *      Hang onto a reference on the object in case a
2762          *      multi-threaded application for some reason decides
2763          *      to deallocate the portion of the address space into
2764          *      which we will insert this object.
2765          *
2766          *      Unfortunately, we must insert the object now before
2767          *      we can talk to the pmap module about which addresses
2768          *      must be wired down.  Hence, the race with a multi-
2769          *      threaded app.
2770          */
2771         vm_object_reference(cpm_obj);
2772
2773         /*
2774          *      Insert object into map.
2775          */
2776
2777         kr = vm_map_enter(
2778                 map,
2779                 addr,
2780                 size,
2781                 (vm_map_offset_t)0,
2782                 flags,
2783                 cpm_obj,
2784                 (vm_object_offset_t)0,
2785                 FALSE,
2786                 VM_PROT_ALL,
2787                 VM_PROT_ALL,
2788                 VM_INHERIT_DEFAULT);
2789
2790         if (kr != KERN_SUCCESS) {
2791                 /*
2792                  *      A CPM object doesn't have can_persist set,
2793                  *      so all we have to do is deallocate it to
2794                  *      free up these pages.
2795                  */
2796                 assert(cpm_obj->pager_created == FALSE);
2797                 assert(cpm_obj->can_persist == FALSE);
2798                 assert(cpm_obj->pageout == FALSE);
2799                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2800                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2801                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2802         }
2803
2804         /*
2805          *      Inform the physical mapping system that the
2806          *      range of addresses may not fault, so that
2807          *      page tables and such can be locked down as well.
2808          */
2809         start = *addr;
2810         end = start + size;
2811         pmap = vm_map_pmap(map);
2812         pmap_pageable(pmap, start, end, FALSE);
2813
2814         /*
2815          *      Enter each page into the pmap, to avoid faults.
2816          *      Note that this loop could be coded more efficiently,
2817          *      if the need arose, rather than looking up each page
2818          *      again.
2819          */
2820         for (offset = 0, va = start; offset < size;
2821              va += PAGE_SIZE, offset += PAGE_SIZE) {
2822                 int type_of_fault;
2823
2824                 vm_object_lock(cpm_obj);
2825                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2826                 assert(m != VM_PAGE_NULL);
2827
2828                 vm_page_zero_fill(m);
2829
2830                 type_of_fault = DBG_ZERO_FILL_FAULT;
2831
2832                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2833                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2834                                &type_of_fault);
2835
2836                 vm_object_unlock(cpm_obj);
2837         }
2838
2839 #if     MACH_ASSERT
2840         /*
2841          *      Verify ordering in address space.
2842          */
2843         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2844                 vm_object_lock(cpm_obj);
2845                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2846                 vm_object_unlock(cpm_obj);
2847                 if (m == VM_PAGE_NULL)
2848                         panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2849                               cpm_obj, offset);
2850                 assert(m->tabled);
2851                 assert(!m->busy);
2852                 assert(!m->wanted);
2853                 assert(!m->fictitious);
2854                 assert(!m->private);
2855                 assert(!m->absent);
2856                 assert(!m->error);
2857                 assert(!m->cleaning);
2858                 assert(!m->precious);
2859                 assert(!m->clustered);
2860                 if (offset != 0) {
2861                         if (m->phys_page != prev_addr + 1) {
2862                                 printf("start 0x%x end 0x%x va 0x%x\n",
2863                                        start, end, va);
2864                                 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2865                                 printf("m 0x%x prev_address 0x%x\n", m,
2866                                        prev_addr);
2867                                 panic("vm_allocate_cpm:  pages not contig!");
2868                         }
2869                 }
2870                 prev_addr = m->phys_page;
2871         }
2872 #endif  /* MACH_ASSERT */
2873
2874         vm_object_deallocate(cpm_obj); /* kill extra ref */
2875
2876         return kr;
2877 }
2878
2879
2880 #else   /* VM_CPM */
2881
2882 /*
2883  *      Interface is defined in all cases, but unless the kernel
2884  *      is built explicitly for this option, the interface does
2885  *      nothing.
2886  */
2887
2888 kern_return_t
2889 vm_map_enter_cpm(
2890         __unused vm_map_t       map,
2891         __unused vm_map_offset_t        *addr,
2892         __unused vm_map_size_t  size,
2893         __unused int            flags)
2894 {
2895         return KERN_FAILURE;
2896 }
2897 #endif /* VM_CPM */
2898
2899 /* Not used without nested pmaps */
2900 #ifndef NO_NESTED_PMAP
2901 /*
2902  * Clip and unnest a portion of a nested submap mapping.
2903  */
2904
2905
2906 static void
2907 vm_map_clip_unnest(
2908         vm_map_t        map,
2909         vm_map_entry_t  entry,
2910         vm_map_offset_t start_unnest,
2911         vm_map_offset_t end_unnest)
2912 {
2913         vm_map_offset_t old_start_unnest = start_unnest;
2914         vm_map_offset_t old_end_unnest = end_unnest;
2915
2916         assert(entry->is_sub_map);
2917         assert(entry->object.sub_map != NULL);
2918
2919         /*
2920          * Query the platform for the optimal unnest range.
2921          * DRK: There's some duplication of effort here, since
2922          * callers may have adjusted the range to some extent. This
2923          * routine was introduced to support 1GiB subtree nesting
2924          * for x86 platforms, which can also nest on 2MiB boundaries
2925          * depending on size/alignment.
2926          */
2927         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2928                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2929         }
2930
2931         if (entry->vme_start > start_unnest ||
2932             entry->vme_end < end_unnest) {
2933                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2934                       "bad nested entry: start=0x%llx end=0x%llx\n",
2935                       (long long)start_unnest, (long long)end_unnest,
2936                       (long long)entry->vme_start, (long long)entry->vme_end);
2937         }
2938
2939         if (start_unnest > entry->vme_start) {
2940                 _vm_map_clip_start(&map->hdr,
2941                                    entry,
2942                                    start_unnest);
2943                 vm_map_store_update_first_free(map, map->first_free);
2944         }
2945         if (entry->vme_end > end_unnest) {
2946                 _vm_map_clip_end(&map->hdr,
2947                                  entry,
2948                                  end_unnest);
2949                 vm_map_store_update_first_free(map, map->first_free);
2950         }
2951
2952         pmap_unnest(map->pmap,
2953                     entry->vme_start,
2954                     entry->vme_end - entry->vme_start);
2955         if ((map->mapped) && (map->ref_count)) {
2956                 /* clean up parent map/maps */
2957                 vm_map_submap_pmap_clean(
2958                         map, entry->vme_start,
2959                         entry->vme_end,
2960                         entry->object.sub_map,
2961                         entry->offset);
2962         }
2963         entry->use_pmap = FALSE;
2964 }
2965 #endif  /* NO_NESTED_PMAP */
2966
2967 /*
2968  *      vm_map_clip_start:      [ internal use only ]
2969  *
2970  *      Asserts that the given entry begins at or after
2971  *      the specified address; if necessary,
2972  *      it splits the entry into two.
2973  */
2974 static void
2975 vm_map_clip_start(
2976         vm_map_t        map,
2977         vm_map_entry_t  entry,
2978         vm_map_offset_t startaddr)
2979 {
2980 #ifndef NO_NESTED_PMAP
2981         if (entry->use_pmap &&
2982             startaddr >= entry->vme_start) {
2983                 vm_map_offset_t start_unnest, end_unnest;
2984
2985                 /*
2986                  * Make sure "startaddr" is no longer in a nested range
2987                  * before we clip.  Unnest only the minimum range the platform
2988                  * can handle.
2989                  * vm_map_clip_unnest may perform additional adjustments to
2990                  * the unnest range.
2991                  */
2992                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2993                 end_unnest = start_unnest + pmap_nesting_size_min;
2994                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2995         }
2996 #endif /* NO_NESTED_PMAP */
2997         if (startaddr > entry->vme_start) {
2998                 if (entry->object.vm_object &&
2999                     !entry->is_sub_map &&
3000                     entry->object.vm_object->phys_contiguous) {
3001                         pmap_remove(map->pmap,
3002                                     (addr64_t)(entry->vme_start),
3003                                     (addr64_t)(entry->vme_end));
3004                 }
3005                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3006                 vm_map_store_update_first_free(map, map->first_free);
3007         }
3008 }
3009
3010
3011 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3012         MACRO_BEGIN \
3013         if ((startaddr) > (entry)->vme_start) \
3014                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3015         MACRO_END
3016
3017 /*
3018  *      This routine is called only when it is known that
3019  *      the entry must be split.
3020  */
3021 static void
3022 _vm_map_clip_start(
3023         register struct vm_map_header   *map_header,
3024         register vm_map_entry_t         entry,
3025         register vm_map_offset_t                start)
3026 {
3027         register vm_map_entry_t new_entry;
3028
3029         /*
3030          *      Split off the front portion --
3031          *      note that we must insert the new
3032          *      entry BEFORE this one, so that
3033          *      this entry has the specified starting
3034          *      address.
3035          */
3036
3037         new_entry = _vm_map_entry_create(map_header);
3038         vm_map_entry_copy_full(new_entry, entry);
3039
3040         new_entry->vme_end = start;
3041         entry->offset += (start - entry->vme_start);
3042         entry->vme_start = start;
3043
3044         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3045
3046         if (entry->is_sub_map)
3047                 vm_map_reference(new_entry->object.sub_map);
3048         else
3049                 vm_object_reference(new_entry->object.vm_object);
3050 }
3051
3052
3053 /*
3054  *      vm_map_clip_end:        [ internal use only ]
3055  *
3056  *      Asserts that the given entry ends at or before
3057  *      the specified address; if necessary,
3058  *      it splits the entry into two.
3059  */
3060 static void
3061 vm_map_clip_end(
3062         vm_map_t        map,
3063         vm_map_entry_t  entry,
3064         vm_map_offset_t endaddr)
3065 {
3066         if (endaddr > entry->vme_end) {
3067                 /*
3068                  * Within the scope of this clipping, limit "endaddr" to
3069                  * the end of this map entry...
3070                  */
3071                 endaddr = entry->vme_end;
3072         }
3073 #ifndef NO_NESTED_PMAP
3074         if (entry->use_pmap) {
3075                 vm_map_offset_t start_unnest, end_unnest;
3076
3077                 /*
3078                  * Make sure the range between the start of this entry and
3079                  * the new "endaddr" is no longer nested before we clip.
3080                  * Unnest only the minimum range the platform can handle.
3081                  * vm_map_clip_unnest may perform additional adjustments to
3082                  * the unnest range.
3083                  */
3084                 start_unnest = entry->vme_start;
3085                 end_unnest =
3086                         (endaddr + pmap_nesting_size_min - 1) &
3087                         ~(pmap_nesting_size_min - 1);
3088                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3089         }
3090 #endif /* NO_NESTED_PMAP */
3091         if (endaddr < entry->vme_end) {
3092                 if (entry->object.vm_object &&
3093                     !entry->is_sub_map &&
3094                     entry->object.vm_object->phys_contiguous) {
3095                         pmap_remove(map->pmap,
3096                                     (addr64_t)(entry->vme_start),
3097                                     (addr64_t)(entry->vme_end));
3098                 }
3099                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3100                 vm_map_store_update_first_free(map, map->first_free);
3101         }
3102 }
3103
3104
3105 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3106         MACRO_BEGIN \
3107         if ((endaddr) < (entry)->vme_end) \
3108                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3109         MACRO_END
3110
3111 /*
3112  *      This routine is called only when it is known that
3113  *      the entry must be split.
3114  */
3115 static void
3116 _vm_map_clip_end(
3117         register struct vm_map_header   *map_header,
3118         register vm_map_entry_t         entry,
3119         register vm_map_offset_t        end)
3120 {
3121         register vm_map_entry_t new_entry;
3122
3123         /*
3124          *      Create a new entry and insert it
3125          *      AFTER the specified entry
3126          */
3127
3128         new_entry = _vm_map_entry_create(map_header);
3129         vm_map_entry_copy_full(new_entry, entry);
3130
3131         new_entry->vme_start = entry->vme_end = end;
3132         new_entry->offset += (end - entry->vme_start);
3133
3134         _vm_map_store_entry_link(map_header, entry, new_entry);
3135
3136         if (entry->is_sub_map)
3137                 vm_map_reference(new_entry->object.sub_map);
3138         else
3139                 vm_object_reference(new_entry->object.vm_object);
3140 }
3141
3142
3143 /*
3144  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3145  *
3146  *      Asserts that the starting and ending region
3147  *      addresses fall within the valid range of the map.
3148  */
3149 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3150         MACRO_BEGIN                             \
3151         if (start < vm_map_min(map))            \
3152                 start = vm_map_min(map);        \
3153         if (end > vm_map_max(map))              \
3154                 end = vm_map_max(map);          \
3155         if (start > end)                        \
3156                 start = end;                    \
3157         MACRO_END
3158
3159 /*
3160  *      vm_map_range_check:     [ internal use only ]
3161  *
3162  *      Check that the region defined by the specified start and
3163  *      end addresses are wholly contained within a single map
3164  *      entry or set of adjacent map entries of the spacified map,
3165  *      i.e. the specified region contains no unmapped space.
3166  *      If any or all of the region is unmapped, FALSE is returned.
3167  *      Otherwise, TRUE is returned and if the output argument 'entry'
3168  *      is not NULL it points to the map entry containing the start
3169  *      of the region.
3170  *
3171  *      The map is locked for reading on entry and is left locked.
3172  */
3173 static boolean_t
3174 vm_map_range_check(
3175         register vm_map_t       map,
3176         register vm_map_offset_t        start,
3177         register vm_map_offset_t        end,
3178         vm_map_entry_t          *entry)
3179 {
3180         vm_map_entry_t          cur;
3181         register vm_map_offset_t        prev;
3182
3183         /*
3184          *      Basic sanity checks first
3185          */
3186         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3187                 return (FALSE);
3188
3189         /*
3190          *      Check first if the region starts within a valid
3191          *      mapping for the map.
3192          */
3193         if (!vm_map_lookup_entry(map, start, &cur))
3194                 return (FALSE);
3195
3196         /*
3197          *      Optimize for the case that the region is contained
3198          *      in a single map entry.
3199          */
3200         if (entry != (vm_map_entry_t *) NULL)
3201                 *entry = cur;
3202         if (end <= cur->vme_end)
3203                 return (TRUE);
3204
3205         /*
3206          *      If the region is not wholly contained within a
3207          *      single entry, walk the entries looking for holes.
3208          */
3209         prev = cur->vme_end;
3210         cur = cur->vme_next;
3211         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3212                 if (end <= cur->vme_end)
3213                         return (TRUE);
3214                 prev = cur->vme_end;
3215                 cur = cur->vme_next;
3216         }
3217         return (FALSE);
3218 }
3219
3220 /*
3221  *      vm_map_submap:          [ kernel use only ]
3222  *
3223  *      Mark the given range as handled by a subordinate map.
3224  *
3225  *      This range must have been created with vm_map_find using
3226  *      the vm_submap_object, and no other operations may have been
3227  *      performed on this range prior to calling vm_map_submap.
3228  *
3229  *      Only a limited number of operations can be performed
3230  *      within this rage after calling vm_map_submap:
3231  *              vm_fault
3232  *      [Don't try vm_map_copyin!]
3233  *
3234  *      To remove a submapping, one must first remove the
3235  *      range from the superior map, and then destroy the
3236  *      submap (if desired).  [Better yet, don't try it.]
3237  */
3238 kern_return_t
3239 vm_map_submap(
3240         vm_map_t                map,
3241         vm_map_offset_t start,
3242         vm_map_offset_t end,
3243         vm_map_t                submap,
3244         vm_map_offset_t offset,
3245 #ifdef NO_NESTED_PMAP
3246         __unused
3247 #endif  /* NO_NESTED_PMAP */
3248         boolean_t               use_pmap)
3249 {
3250         vm_map_entry_t          entry;
3251         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3252         register vm_object_t    object;
3253
3254         vm_map_lock(map);
3255
3256         if (! vm_map_lookup_entry(map, start, &entry)) {
3257                 entry = entry->vme_next;
3258         }
3259
3260         if (entry == vm_map_to_entry(map) ||
3261             entry->is_sub_map) {
3262                 vm_map_unlock(map);
3263                 return KERN_INVALID_ARGUMENT;
3264         }
3265
3266         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3267         vm_map_clip_start(map, entry, start);
3268         vm_map_clip_end(map, entry, end);
3269
3270         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3271             (!entry->is_sub_map) &&
3272             ((object = entry->object.vm_object) == vm_submap_object) &&
3273             (object->resident_page_count == 0) &&
3274             (object->copy == VM_OBJECT_NULL) &&
3275             (object->shadow == VM_OBJECT_NULL) &&
3276             (!object->pager_created)) {
3277                 entry->offset = (vm_object_offset_t)offset;
3278                 entry->object.vm_object = VM_OBJECT_NULL;
3279                 vm_object_deallocate(object);
3280                 entry->is_sub_map = TRUE;
3281                 entry->object.sub_map = submap;
3282                 vm_map_reference(submap);
3283                 submap->mapped = TRUE;
3284
3285 #ifndef NO_NESTED_PMAP
3286                 if (use_pmap) {
3287                         /* nest if platform code will allow */
3288                         if(submap->pmap == NULL) {
3289                                 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3290                                 if(submap->pmap == PMAP_NULL) {
3291                                         vm_map_unlock(map);
3292                                         return(KERN_NO_SPACE);
3293                                 }
3294                         }
3295                         result = pmap_nest(map->pmap,
3296                                            (entry->object.sub_map)->pmap,
3297                                            (addr64_t)start,
3298                                            (addr64_t)start,
3299                                            (uint64_t)(end - start));
3300                         if(result)
3301                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3302                         entry->use_pmap = TRUE;
3303                 }
3304 #else   /* NO_NESTED_PMAP */
3305                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3306 #endif  /* NO_NESTED_PMAP */
3307                 result = KERN_SUCCESS;
3308         }
3309         vm_map_unlock(map);
3310
3311         return(result);
3312 }
3313
3314 /*
3315  *      vm_map_protect:
3316  *
3317  *      Sets the protection of the specified address
3318  *      region in the target map.  If "set_max" is
3319  *      specified, the maximum protection is to be set;
3320  *      otherwise, only the current protection is affected.
3321  */
3322 kern_return_t
3323 vm_map_protect(
3324         register vm_map_t       map,
3325         register vm_map_offset_t        start,
3326         register vm_map_offset_t        end,
3327         register vm_prot_t      new_prot,
3328         register boolean_t      set_max)
3329 {
3330         register vm_map_entry_t         current;
3331         register vm_map_offset_t        prev;
3332         vm_map_entry_t                  entry;
3333         vm_prot_t                       new_max;
3334
3335         XPR(XPR_VM_MAP,
3336             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3337             map, start, end, new_prot, set_max);
3338
3339         vm_map_lock(map);
3340
3341         /* LP64todo - remove this check when vm_map_commpage64()
3342          * no longer has to stuff in a map_entry for the commpage
3343          * above the map's max_offset.
3344          */
3345         if (start >= map->max_offset) {
3346                 vm_map_unlock(map);
3347                 return(KERN_INVALID_ADDRESS);
3348         }
3349
3350         while(1) {
3351                 /*
3352                  *      Lookup the entry.  If it doesn't start in a valid
3353                  *      entry, return an error.
3354                  */
3355                 if (! vm_map_lookup_entry(map, start, &entry)) {
3356                         vm_map_unlock(map);
3357                         return(KERN_INVALID_ADDRESS);
3358                 }
3359
3360                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3361                         start = SUPERPAGE_ROUND_DOWN(start);
3362                         continue;
3363                 }
3364                 break;
3365         }
3366         if (entry->superpage_size)
3367                 end = SUPERPAGE_ROUND_UP(end);
3368
3369         /*
3370          *      Make a first pass to check for protection and address
3371          *      violations.
3372          */
3373
3374         current = entry;
3375         prev = current->vme_start;
3376         while ((current != vm_map_to_entry(map)) &&
3377                (current->vme_start < end)) {
3378
3379                 /*
3380                  * If there is a hole, return an error.
3381                  */
3382                 if (current->vme_start != prev) {
3383                         vm_map_unlock(map);
3384                         return(KERN_INVALID_ADDRESS);
3385                 }
3386
3387                 new_max = current->max_protection;
3388                 if(new_prot & VM_PROT_COPY) {
3389                         new_max |= VM_PROT_WRITE;
3390                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3391                                 vm_map_unlock(map);
3392                                 return(KERN_PROTECTION_FAILURE);
3393                         }
3394                 } else {
3395                         if ((new_prot & new_max) != new_prot) {
3396                                 vm_map_unlock(map);
3397                                 return(KERN_PROTECTION_FAILURE);
3398                         }
3399                 }
3400
3401 #if CONFIG_EMBEDDED
3402                 if (new_prot & VM_PROT_WRITE) {
3403                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3404                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3405                                 new_prot &= ~VM_PROT_EXECUTE;
3406                         }
3407                 }
3408 #endif
3409
3410                 prev = current->vme_end;
3411                 current = current->vme_next;
3412         }
3413         if (end > prev) {
3414                 vm_map_unlock(map);
3415                 return(KERN_INVALID_ADDRESS);
3416         }
3417
3418         /*
3419          *      Go back and fix up protections.
3420          *      Clip to start here if the range starts within
3421          *      the entry.
3422          */
3423
3424         current = entry;
3425         if (current != vm_map_to_entry(map)) {
3426                 /* clip and unnest if necessary */
3427                 vm_map_clip_start(map, current, start);
3428         }
3429
3430         while ((current != vm_map_to_entry(map)) &&
3431                (current->vme_start < end)) {
3432
3433                 vm_prot_t       old_prot;
3434
3435                 vm_map_clip_end(map, current, end);
3436
3437                 assert(!current->use_pmap); /* clipping did unnest if needed */
3438
3439                 old_prot = current->protection;
3440
3441                 if(new_prot & VM_PROT_COPY) {
3442                         /* caller is asking specifically to copy the      */
3443                         /* mapped data, this implies that max protection  */
3444                         /* will include write.  Caller must be prepared   */
3445                         /* for loss of shared memory communication in the */
3446                         /* target area after taking this step */
3447
3448                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3449                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3450                                 current->offset = 0;
3451                         }
3452                         current->needs_copy = TRUE;
3453                         current->max_protection |= VM_PROT_WRITE;
3454                 }
3455
3456                 if (set_max)
3457                         current->protection =
3458                                 (current->max_protection =
3459                                  new_prot & ~VM_PROT_COPY) &
3460                                 old_prot;
3461                 else
3462                         current->protection = new_prot & ~VM_PROT_COPY;
3463
3464                 /*
3465                  *      Update physical map if necessary.
3466                  *      If the request is to turn off write protection,
3467                  *      we won't do it for real (in pmap). This is because
3468                  *      it would cause copy-on-write to fail.  We've already
3469                  *      set, the new protection in the map, so if a
3470                  *      write-protect fault occurred, it will be fixed up
3471                  *      properly, COW or not.
3472                  */
3473                 if (current->protection != old_prot) {
3474                         /* Look one level in we support nested pmaps */
3475                         /* from mapped submaps which are direct entries */
3476                         /* in our map */
3477
3478                         vm_prot_t prot;
3479
3480                         prot = current->protection & ~VM_PROT_WRITE;
3481
3482                         if (override_nx(map, current->alias) && prot)
3483                                 prot |= VM_PROT_EXECUTE;
3484
3485                         if (current->is_sub_map && current->use_pmap) {
3486                                 pmap_protect(current->object.sub_map->pmap,
3487                                              current->vme_start,
3488                                              current->vme_end,
3489                                              prot);
3490                         } else {
3491                                 pmap_protect(map->pmap,
3492                                              current->vme_start,
3493                                              current->vme_end,
3494                                              prot);
3495                         }
3496                 }
3497                 current = current->vme_next;
3498         }
3499
3500         current = entry;
3501         while ((current != vm_map_to_entry(map)) &&
3502                (current->vme_start <= end)) {
3503                 vm_map_simplify_entry(map, current);
3504                 current = current->vme_next;
3505         }
3506
3507         vm_map_unlock(map);
3508         return(KERN_SUCCESS);
3509 }
3510
3511 /*
3512  *      vm_map_inherit:
3513  *
3514  *      Sets the inheritance of the specified address
3515  *      range in the target map.  Inheritance
3516  *      affects how the map will be shared with
3517  *      child maps at the time of vm_map_fork.
3518  */
3519 kern_return_t
3520 vm_map_inherit(
3521         register vm_map_t       map,
3522         register vm_map_offset_t        start,
3523         register vm_map_offset_t        end,
3524         register vm_inherit_t   new_inheritance)
3525 {
3526         register vm_map_entry_t entry;
3527         vm_map_entry_t  temp_entry;
3528
3529         vm_map_lock(map);
3530
3531         VM_MAP_RANGE_CHECK(map, start, end);
3532
3533         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3534                 entry = temp_entry;
3535         }
3536         else {
3537                 temp_entry = temp_entry->vme_next;
3538                 entry = temp_entry;
3539         }
3540
3541         /* first check entire range for submaps which can't support the */
3542         /* given inheritance. */
3543         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3544                 if(entry->is_sub_map) {
3545                         if(new_inheritance == VM_INHERIT_COPY) {
3546                                 vm_map_unlock(map);
3547                                 return(KERN_INVALID_ARGUMENT);
3548                         }
3549                 }
3550
3551                 entry = entry->vme_next;
3552         }
3553
3554         entry = temp_entry;
3555         if (entry != vm_map_to_entry(map)) {
3556                 /* clip and unnest if necessary */
3557                 vm_map_clip_start(map, entry, start);
3558         }
3559
3560         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3561                 vm_map_clip_end(map, entry, end);
3562                 assert(!entry->use_pmap); /* clip did unnest if needed */
3563
3564                 entry->inheritance = new_inheritance;
3565
3566                 entry = entry->vme_next;
3567         }
3568
3569         vm_map_unlock(map);
3570         return(KERN_SUCCESS);
3571 }
3572
3573 /*
3574  * Update the accounting for the amount of wired memory in this map.  If the user has
3575  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3576  */
3577
3578 static kern_return_t
3579 add_wire_counts(
3580         vm_map_t        map,
3581         vm_map_entry_t  entry,
3582         boolean_t       user_wire)
3583 {
3584         vm_map_size_t   size;
3585
3586         if (user_wire) {
3587                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3588
3589                 /*
3590                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3591                  * this map entry.
3592                  */
3593
3594                 if (entry->user_wired_count == 0) {
3595                         size = entry->vme_end - entry->vme_start;
3596
3597                         /*
3598                          * Since this is the first time the user is wiring this map entry, check to see if we're
3599                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3600                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3601                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3602                          * limit, then we fail.
3603                          */
3604
3605                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3606                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3607                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3608                                 return KERN_RESOURCE_SHORTAGE;
3609
3610                         /*
3611                          * The first time the user wires an entry, we also increment the wired_count and add this to
3612                          * the total that has been wired in the map.
3613                          */
3614
3615                         if (entry->wired_count >= MAX_WIRE_COUNT)
3616                                 return KERN_FAILURE;
3617
3618                         entry->wired_count++;
3619                         map->user_wire_size += size;
3620                 }
3621
3622                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3623                         return KERN_FAILURE;
3624
3625                 entry->user_wired_count++;
3626
3627         } else {
3628
3629                 /*
3630                  * The kernel's wiring the memory.  Just bump the count and continue.
3631                  */
3632
3633                 if (entry->wired_count >= MAX_WIRE_COUNT)
3634                         panic("vm_map_wire: too many wirings");
3635
3636                 entry->wired_count++;
3637         }
3638
3639         return KERN_SUCCESS;
3640 }
3641
3642 /*
3643  * Update the memory wiring accounting now that the given map entry is being unwired.
3644  */
3645
3646 static void
3647 subtract_wire_counts(
3648         vm_map_t        map,
3649         vm_map_entry_t  entry,
3650         boolean_t       user_wire)
3651 {
3652
3653         if (user_wire) {
3654
3655                 /*
3656                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3657                  */
3658
3659                 if (entry->user_wired_count == 1) {
3660
3661                         /*
3662                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3663                          * user wired memory for this map.
3664                          */
3665
3666                         assert(entry->wired_count >= 1);
3667                         entry->wired_count--;
3668                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3669                 }
3670
3671                 assert(entry->user_wired_count >= 1);
3672                 entry->user_wired_count--;
3673
3674         } else {
3675
3676                 /*
3677                  * The kernel is unwiring the memory.   Just update the count.
3678                  */
3679
3680                 assert(entry->wired_count >= 1);
3681                 entry->wired_count--;
3682         }
3683 }
3684
3685 /*
3686  *      vm_map_wire:
3687  *
3688  *      Sets the pageability of the specified address range in the
3689  *      target map as wired.  Regions specified as not pageable require
3690  *      locked-down physical memory and physical page maps.  The
3691  *      access_type variable indicates types of accesses that must not
3692  *      generate page faults.  This is checked against protection of
3693  *      memory being locked-down.
3694  *
3695  *      The map must not be locked, but a reference must remain to the
3696  *      map throughout the call.
3697  */
3698 static kern_return_t
3699 vm_map_wire_nested(
3700         register vm_map_t       map,
3701         register vm_map_offset_t        start,
3702         register vm_map_offset_t        end,
3703         register vm_prot_t      access_type,
3704         boolean_t               user_wire,
3705         pmap_t                  map_pmap,
3706         vm_map_offset_t         pmap_addr)
3707 {
3708         register vm_map_entry_t entry;
3709         struct vm_map_entry     *first_entry, tmp_entry;
3710         vm_map_t                real_map;
3711         register vm_map_offset_t        s,e;
3712         kern_return_t           rc;
3713         boolean_t               need_wakeup;
3714         boolean_t               main_map = FALSE;
3715         wait_interrupt_t        interruptible_state;
3716         thread_t                cur_thread;
3717         unsigned int            last_timestamp;
3718         vm_map_size_t           size;
3719
3720         vm_map_lock(map);
3721         if(map_pmap == NULL)
3722                 main_map = TRUE;
3723         last_timestamp = map->timestamp;
3724
3725         VM_MAP_RANGE_CHECK(map, start, end);
3726         assert(page_aligned(start));
3727         assert(page_aligned(end));
3728         if (start == end) {
3729                 /* We wired what the caller asked for, zero pages */
3730                 vm_map_unlock(map);
3731                 return KERN_SUCCESS;
3732         }
3733
3734         need_wakeup = FALSE;
3735         cur_thread = current_thread();
3736
3737         s = start;
3738         rc = KERN_SUCCESS;
3739
3740         if (vm_map_lookup_entry(map, s, &first_entry)) {
3741                 entry = first_entry;
3742                 /*
3743                  * vm_map_clip_start will be done later.
3744                  * We don't want to unnest any nested submaps here !
3745                  */
3746         } else {
3747                 /* Start address is not in map */
3748                 rc = KERN_INVALID_ADDRESS;
3749                 goto done;
3750         }
3751
3752         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3753                 /*
3754                  * At this point, we have wired from "start" to "s".
3755                  * We still need to wire from "s" to "end".
3756                  *
3757                  * "entry" hasn't been clipped, so it could start before "s"
3758                  * and/or end after "end".
3759                  */
3760
3761                 /* "e" is how far we want to wire in this entry */
3762                 e = entry->vme_end;
3763                 if (e > end)
3764                         e = end;
3765
3766                 /*
3767                  * If another thread is wiring/unwiring this entry then
3768                  * block after informing other thread to wake us up.
3769                  */
3770                 if (entry->in_transition) {
3771                         wait_result_t wait_result;
3772
3773                         /*
3774                          * We have not clipped the entry.  Make sure that
3775                          * the start address is in range so that the lookup
3776                          * below will succeed.
3777                          * "s" is the current starting point: we've already
3778                          * wired from "start" to "s" and we still have
3779                          * to wire from "s" to "end".
3780                          */
3781
3782                         entry->needs_wakeup = TRUE;
3783
3784                         /*
3785                          * wake up anybody waiting on entries that we have
3786                          * already wired.
3787                          */
3788                         if (need_wakeup) {
3789                                 vm_map_entry_wakeup(map);
3790                                 need_wakeup = FALSE;
3791                         }
3792                         /*
3793                          * User wiring is interruptible
3794                          */
3795                         wait_result = vm_map_entry_wait(map,
3796                                                         (user_wire) ? THREAD_ABORTSAFE :
3797                                                         THREAD_UNINT);
3798                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3799                                 /*
3800                                  * undo the wirings we have done so far
3801                                  * We do not clear the needs_wakeup flag,
3802                                  * because we cannot tell if we were the
3803                                  * only one waiting.
3804                                  */
3805                                 rc = KERN_FAILURE;
3806                                 goto done;
3807                         }
3808
3809                         /*
3810                          * Cannot avoid a lookup here. reset timestamp.
3811                          */
3812                         last_timestamp = map->timestamp;
3813
3814                         /*
3815                          * The entry could have been clipped, look it up again.
3816                          * Worse that can happen is, it may not exist anymore.
3817                          */
3818                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3819                                 /*
3820                                  * User: undo everything upto the previous
3821                                  * entry.  let vm_map_unwire worry about
3822                                  * checking the validity of the range.
3823                                  */
3824                                 rc = KERN_FAILURE;
3825                                 goto done;
3826                         }
3827                         entry = first_entry;
3828                         continue;
3829                 }
3830
3831                 if (entry->is_sub_map) {
3832                         vm_map_offset_t sub_start;
3833                         vm_map_offset_t sub_end;
3834                         vm_map_offset_t local_start;
3835                         vm_map_offset_t local_end;
3836                         pmap_t          pmap;
3837
3838                         vm_map_clip_start(map, entry, s);
3839                         vm_map_clip_end(map, entry, end);
3840
3841                         sub_start = entry->offset;
3842                         sub_end = entry->vme_end;
3843                         sub_end += entry->offset - entry->vme_start;
3844
3845                         local_end = entry->vme_end;
3846                         if(map_pmap == NULL) {
3847                                 vm_object_t             object;
3848                                 vm_object_offset_t      offset;
3849                                 vm_prot_t               prot;
3850                                 boolean_t               wired;
3851                                 vm_map_entry_t          local_entry;
3852                                 vm_map_version_t         version;
3853                                 vm_map_t                lookup_map;
3854
3855                                 if(entry->use_pmap) {
3856                                         pmap = entry->object.sub_map->pmap;
3857                                         /* ppc implementation requires that */
3858                                         /* submaps pmap address ranges line */
3859                                         /* up with parent map */
3860 #ifdef notdef
3861                                         pmap_addr = sub_start;
3862 #endif
3863                                         pmap_addr = s;
3864                                 } else {
3865                                         pmap = map->pmap;
3866                                         pmap_addr = s;
3867                                 }
3868
3869                                 if (entry->wired_count) {
3870                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3871                                                 goto done;
3872
3873                                         /*
3874                                          * The map was not unlocked:
3875                                          * no need to goto re-lookup.
3876                                          * Just go directly to next entry.
3877                                          */
3878                                         entry = entry->vme_next;
3879                                         s = entry->vme_start;
3880                                         continue;
3881
3882                                 }
3883
3884                                 /* call vm_map_lookup_locked to */
3885                                 /* cause any needs copy to be   */
3886                                 /* evaluated */
3887                                 local_start = entry->vme_start;
3888                                 lookup_map = map;
3889                                 vm_map_lock_write_to_read(map);
3890                                 if(vm_map_lookup_locked(
3891                                            &lookup_map, local_start,
3892                                            access_type,
3893                                            OBJECT_LOCK_EXCLUSIVE,
3894                                            &version, &object,
3895                                            &offset, &prot, &wired,
3896                                            NULL,
3897                                            &real_map)) {
3898
3899                                         vm_map_unlock_read(lookup_map);
3900                                         vm_map_unwire(map, start,
3901                                                       s, user_wire);
3902                                         return(KERN_FAILURE);
3903                                 }
3904                                 if(real_map != lookup_map)
3905                                         vm_map_unlock(real_map);
3906                                 vm_map_unlock_read(lookup_map);
3907                                 vm_map_lock(map);
3908                                 vm_object_unlock(object);
3909
3910                                 /* we unlocked, so must re-lookup */
3911                                 if (!vm_map_lookup_entry(map,
3912                                                          local_start,
3913                                                          &local_entry)) {
3914                                         rc = KERN_FAILURE;
3915                                         goto done;
3916                                 }
3917
3918                                 /*
3919                                  * entry could have been "simplified",
3920                                  * so re-clip
3921                                  */
3922                                 entry = local_entry;
3923                                 assert(s == local_start);
3924                                 vm_map_clip_start(map, entry, s);
3925                                 vm_map_clip_end(map, entry, end);
3926                                 /* re-compute "e" */
3927                                 e = entry->vme_end;
3928                                 if (e > end)
3929                                         e = end;
3930
3931                                 /* did we have a change of type? */
3932                                 if (!entry->is_sub_map) {
3933                                         last_timestamp = map->timestamp;
3934                                         continue;
3935                                 }
3936                         } else {
3937                                 local_start = entry->vme_start;
3938                                 pmap = map_pmap;
3939                         }
3940
3941                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3942                                 goto done;
3943
3944                         entry->in_transition = TRUE;
3945
3946                         vm_map_unlock(map);
3947                         rc = vm_map_wire_nested(entry->object.sub_map,
3948                                                 sub_start, sub_end,
3949                                                 access_type,
3950                                                 user_wire, pmap, pmap_addr);
3951                         vm_map_lock(map);
3952
3953                         /*
3954                          * Find the entry again.  It could have been clipped
3955                          * after we unlocked the map.
3956                          */
3957                         if (!vm_map_lookup_entry(map, local_start,
3958                                                  &first_entry))
3959                                 panic("vm_map_wire: re-lookup failed");
3960                         entry = first_entry;
3961
3962                         assert(local_start == s);
3963                         /* re-compute "e" */
3964                         e = entry->vme_end;
3965                         if (e > end)
3966                                 e = end;
3967
3968                         last_timestamp = map->timestamp;
3969                         while ((entry != vm_map_to_entry(map)) &&
3970                                (entry->vme_start < e)) {
3971                                 assert(entry->in_transition);
3972                                 entry->in_transition = FALSE;
3973                                 if (entry->needs_wakeup) {
3974                                         entry->needs_wakeup = FALSE;
3975                                         need_wakeup = TRUE;
3976                                 }
3977                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3978                                         subtract_wire_counts(map, entry, user_wire);
3979                                 }
3980                                 entry = entry->vme_next;
3981                         }
3982                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
3983                                 goto done;
3984                         }
3985
3986                         /* no need to relookup again */
3987                         s = entry->vme_start;
3988                         continue;
3989                 }
3990
3991                 /*
3992                  * If this entry is already wired then increment
3993                  * the appropriate wire reference count.
3994                  */
3995                 if (entry->wired_count) {
3996                         /*
3997                          * entry is already wired down, get our reference
3998                          * after clipping to our range.
3999                          */
4000                         vm_map_clip_start(map, entry, s);
4001                         vm_map_clip_end(map, entry, end);
4002
4003                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4004                                 goto done;
4005
4006                         /* map was not unlocked: no need to relookup */
4007                         entry = entry->vme_next;
4008                         s = entry->vme_start;
4009                         continue;
4010                 }
4011
4012                 /*
4013                  * Unwired entry or wire request transmitted via submap
4014                  */
4015
4016
4017                 /*
4018                  * Perform actions of vm_map_lookup that need the write
4019                  * lock on the map: create a shadow object for a
4020                  * copy-on-write region, or an object for a zero-fill
4021                  * region.
4022                  */
4023                 size = entry->vme_end - entry->vme_start;
4024                 /*
4025                  * If wiring a copy-on-write page, we need to copy it now
4026                  * even if we're only (currently) requesting read access.
4027                  * This is aggressive, but once it's wired we can't move it.
4028                  */
4029                 if (entry->needs_copy) {
4030                         vm_object_shadow(&entry->object.vm_object,
4031                                          &entry->offset, size);
4032                         entry->needs_copy = FALSE;
4033                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4034                         entry->object.vm_object = vm_object_allocate(size);
4035                         entry->offset = (vm_object_offset_t)0;
4036                 }
4037
4038                 vm_map_clip_start(map, entry, s);
4039                 vm_map_clip_end(map, entry, end);
4040
4041                 /* re-compute "e" */
4042                 e = entry->vme_end;
4043                 if (e > end)
4044                         e = end;
4045
4046                 /*
4047                  * Check for holes and protection mismatch.
4048                  * Holes: Next entry should be contiguous unless this
4049                  *        is the end of the region.
4050                  * Protection: Access requested must be allowed, unless
4051                  *      wiring is by protection class
4052                  */
4053                 if ((entry->vme_end < end) &&
4054                     ((entry->vme_next == vm_map_to_entry(map)) ||
4055                      (entry->vme_next->vme_start > entry->vme_end))) {
4056                         /* found a hole */
4057                         rc = KERN_INVALID_ADDRESS;
4058                         goto done;
4059                 }
4060                 if ((entry->protection & access_type) != access_type) {
4061                         /* found a protection problem */
4062                         rc = KERN_PROTECTION_FAILURE;
4063                         goto done;
4064                 }
4065
4066                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4067
4068                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4069                         goto done;
4070
4071                 entry->in_transition = TRUE;
4072
4073                 /*
4074                  * This entry might get split once we unlock the map.
4075                  * In vm_fault_wire(), we need the current range as
4076                  * defined by this entry.  In order for this to work
4077                  * along with a simultaneous clip operation, we make a
4078                  * temporary copy of this entry and use that for the
4079                  * wiring.  Note that the underlying objects do not
4080                  * change during a clip.
4081                  */
4082                 tmp_entry = *entry;
4083
4084                 /*
4085                  * The in_transition state guarentees that the entry
4086                  * (or entries for this range, if split occured) will be
4087                  * there when the map lock is acquired for the second time.
4088                  */
4089                 vm_map_unlock(map);
4090
4091                 if (!user_wire && cur_thread != THREAD_NULL)
4092                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4093                 else
4094                         interruptible_state = THREAD_UNINT;
4095
4096                 if(map_pmap)
4097                         rc = vm_fault_wire(map,
4098                                            &tmp_entry, map_pmap, pmap_addr);
4099                 else
4100                         rc = vm_fault_wire(map,
4101                                            &tmp_entry, map->pmap,
4102                                            tmp_entry.vme_start);
4103
4104                 if (!user_wire && cur_thread != THREAD_NULL)
4105                         thread_interrupt_level(interruptible_state);
4106
4107                 vm_map_lock(map);
4108
4109                 if (last_timestamp+1 != map->timestamp) {
4110                         /*
4111                          * Find the entry again.  It could have been clipped
4112                          * after we unlocked the map.
4113                          */
4114                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4115                                                  &first_entry))
4116                                 panic("vm_map_wire: re-lookup failed");
4117
4118                         entry = first_entry;
4119                 }
4120
4121                 last_timestamp = map->timestamp;
4122
4123                 while ((entry != vm_map_to_entry(map)) &&
4124                        (entry->vme_start < tmp_entry.vme_end)) {
4125                         assert(entry->in_transition);
4126                         entry->in_transition = FALSE;
4127                         if (entry->needs_wakeup) {
4128                                 entry->needs_wakeup = FALSE;
4129                                 need_wakeup = TRUE;
4130                         }
4131                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4132                                 subtract_wire_counts(map, entry, user_wire);
4133                         }
4134                         entry = entry->vme_next;
4135                 }
4136
4137                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4138                         goto done;
4139                 }
4140
4141                 s = entry->vme_start;
4142         } /* end while loop through map entries */
4143
4144 done:
4145         if (rc == KERN_SUCCESS) {
4146                 /* repair any damage we may have made to the VM map */
4147                 vm_map_simplify_range(map, start, end);
4148         }
4149
4150         vm_map_unlock(map);
4151
4152         /*
4153          * wake up anybody waiting on entries we wired.
4154          */
4155         if (need_wakeup)
4156                 vm_map_entry_wakeup(map);
4157
4158         if (rc != KERN_SUCCESS) {
4159                 /* undo what has been wired so far */
4160                 vm_map_unwire(map, start, s, user_wire);
4161         }
4162
4163         return rc;
4164
4165 }
4166
4167 kern_return_t
4168 vm_map_wire(
4169         register vm_map_t       map,
4170         register vm_map_offset_t        start,
4171         register vm_map_offset_t        end,
4172         register vm_prot_t      access_type,
4173         boolean_t               user_wire)
4174 {
4175
4176         kern_return_t   kret;
4177
4178         kret = vm_map_wire_nested(map, start, end, access_type,
4179                                   user_wire, (pmap_t)NULL, 0);
4180         return kret;
4181 }
4182
4183 /*
4184  *      vm_map_unwire:
4185  *
4186  *      Sets the pageability of the specified address range in the target
4187  *      as pageable.  Regions specified must have been wired previously.
4188  *
4189  *      The map must not be locked, but a reference must remain to the map
4190  *      throughout the call.
4191  *
4192  *      Kernel will panic on failures.  User unwire ignores holes and
4193  *      unwired and intransition entries to avoid losing memory by leaving
4194  *      it unwired.
4195  */
4196 static kern_return_t
4197 vm_map_unwire_nested(
4198         register vm_map_t       map,
4199         register vm_map_offset_t        start,
4200         register vm_map_offset_t        end,
4201         boolean_t               user_wire,
4202         pmap_t                  map_pmap,
4203         vm_map_offset_t         pmap_addr)
4204 {
4205         register vm_map_entry_t entry;
4206         struct vm_map_entry     *first_entry, tmp_entry;
4207         boolean_t               need_wakeup;
4208         boolean_t               main_map = FALSE;
4209         unsigned int            last_timestamp;
4210
4211         vm_map_lock(map);
4212         if(map_pmap == NULL)
4213                 main_map = TRUE;
4214         last_timestamp = map->timestamp;
4215
4216         VM_MAP_RANGE_CHECK(map, start, end);
4217         assert(page_aligned(start));
4218         assert(page_aligned(end));
4219
4220         if (start == end) {
4221                 /* We unwired what the caller asked for: zero pages */
4222                 vm_map_unlock(map);
4223                 return KERN_SUCCESS;
4224         }
4225
4226         if (vm_map_lookup_entry(map, start, &first_entry)) {
4227                 entry = first_entry;
4228                 /*
4229                  * vm_map_clip_start will be done later.
4230                  * We don't want to unnest any nested sub maps here !
4231                  */
4232         }
4233         else {
4234                 if (!user_wire) {
4235                         panic("vm_map_unwire: start not found");
4236                 }
4237                 /*      Start address is not in map. */
4238                 vm_map_unlock(map);
4239                 return(KERN_INVALID_ADDRESS);
4240         }
4241
4242         if (entry->superpage_size) {
4243                 /* superpages are always wired */
4244                 vm_map_unlock(map);
4245                 return KERN_INVALID_ADDRESS;
4246         }
4247
4248         need_wakeup = FALSE;
4249         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4250                 if (entry->in_transition) {
4251                         /*
4252                          * 1)
4253                          * Another thread is wiring down this entry. Note
4254                          * that if it is not for the other thread we would
4255                          * be unwiring an unwired entry.  This is not
4256                          * permitted.  If we wait, we will be unwiring memory
4257                          * we did not wire.
4258                          *
4259                          * 2)
4260                          * Another thread is unwiring this entry.  We did not
4261                          * have a reference to it, because if we did, this
4262                          * entry will not be getting unwired now.
4263                          */
4264                         if (!user_wire) {
4265                                 /*
4266                                  * XXX FBDP
4267                                  * This could happen:  there could be some
4268                                  * overlapping vslock/vsunlock operations
4269                                  * going on.
4270                                  * We should probably just wait and retry,
4271                                  * but then we have to be careful that this
4272                                  * entry could get "simplified" after
4273                                  * "in_transition" gets unset and before
4274                                  * we re-lookup the entry, so we would
4275                                  * have to re-clip the entry to avoid
4276                                  * re-unwiring what we have already unwired...
4277                                  * See vm_map_wire_nested().
4278                                  *
4279                                  * Or we could just ignore "in_transition"
4280                                  * here and proceed to decement the wired
4281                                  * count(s) on this entry.  That should be fine
4282                                  * as long as "wired_count" doesn't drop all
4283                                  * the way to 0 (and we should panic if THAT
4284                                  * happens).
4285                                  */
4286                                 panic("vm_map_unwire: in_transition entry");
4287                         }
4288
4289                         entry = entry->vme_next;
4290                         continue;
4291                 }
4292
4293                 if (entry->is_sub_map) {
4294                         vm_map_offset_t sub_start;
4295                         vm_map_offset_t sub_end;
4296                         vm_map_offset_t local_end;
4297                         pmap_t          pmap;
4298
4299                         vm_map_clip_start(map, entry, start);
4300                         vm_map_clip_end(map, entry, end);
4301
4302                         sub_start = entry->offset;
4303                         sub_end = entry->vme_end - entry->vme_start;
4304                         sub_end += entry->offset;
4305                         local_end = entry->vme_end;
4306                         if(map_pmap == NULL) {
4307                                 if(entry->use_pmap) {
4308                                         pmap = entry->object.sub_map->pmap;
4309                                         pmap_addr = sub_start;
4310                                 } else {
4311                                         pmap = map->pmap;
4312                                         pmap_addr = start;
4313                                 }
4314                                 if (entry->wired_count == 0 ||
4315                                     (user_wire && entry->user_wired_count == 0)) {
4316                                         if (!user_wire)
4317                                                 panic("vm_map_unwire: entry is unwired");
4318                                         entry = entry->vme_next;
4319                                         continue;
4320                                 }
4321
4322                                 /*
4323                                  * Check for holes
4324                                  * Holes: Next entry should be contiguous unless
4325                                  * this is the end of the region.
4326                                  */
4327                                 if (((entry->vme_end < end) &&
4328                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4329                                       (entry->vme_next->vme_start
4330                                        > entry->vme_end)))) {
4331                                         if (!user_wire)
4332                                                 panic("vm_map_unwire: non-contiguous region");
4333 /*
4334                                         entry = entry->vme_next;
4335                                         continue;
4336 */
4337                                 }
4338
4339                                 subtract_wire_counts(map, entry, user_wire);
4340
4341                                 if (entry->wired_count != 0) {
4342                                         entry = entry->vme_next;
4343                                         continue;
4344                                 }
4345
4346                                 entry->in_transition = TRUE;
4347                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4348
4349                                 /*
4350                                  * We can unlock the map now. The in_transition state
4351                                  * guarantees existance of the entry.
4352                                  */
4353                                 vm_map_unlock(map);
4354                                 vm_map_unwire_nested(entry->object.sub_map,
4355                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4356                                 vm_map_lock(map);
4357
4358                                 if (last_timestamp+1 != map->timestamp) {
4359                                         /*
4360                                          * Find the entry again.  It could have been
4361                                          * clipped or deleted after we unlocked the map.
4362                                          */
4363                                         if (!vm_map_lookup_entry(map,
4364                                                                  tmp_entry.vme_start,
4365                                                                  &first_entry)) {
4366                                                 if (!user_wire)
4367                                                         panic("vm_map_unwire: re-lookup failed");
4368                                                 entry = first_entry->vme_next;
4369                                         } else
4370                                                 entry = first_entry;
4371                                 }
4372                                 last_timestamp = map->timestamp;
4373
4374                                 /*
4375                                  * clear transition bit for all constituent entries
4376                                  * that were in the original entry (saved in
4377                                  * tmp_entry).  Also check for waiters.
4378                                  */
4379                                 while ((entry != vm_map_to_entry(map)) &&
4380                                        (entry->vme_start < tmp_entry.vme_end)) {
4381                                         assert(entry->in_transition);
4382                                         entry->in_transition = FALSE;
4383                                         if (entry->needs_wakeup) {
4384                                                 entry->needs_wakeup = FALSE;
4385                                                 need_wakeup = TRUE;
4386                                         }
4387                                         entry = entry->vme_next;
4388                                 }
4389                                 continue;
4390                         } else {
4391                                 vm_map_unlock(map);
4392                                 vm_map_unwire_nested(entry->object.sub_map,
4393                                                      sub_start, sub_end, user_wire, map_pmap,
4394                                                      pmap_addr);
4395                                 vm_map_lock(map);
4396
4397                                 if (last_timestamp+1 != map->timestamp) {
4398                                         /*
4399                                          * Find the entry again.  It could have been
4400                                          * clipped or deleted after we unlocked the map.
4401                                          */
4402                                         if (!vm_map_lookup_entry(map,
4403                                                                  tmp_entry.vme_start,
4404                                                                  &first_entry)) {
4405                                                 if (!user_wire)
4406                                                         panic("vm_map_unwire: re-lookup failed");
4407                                                 entry = first_entry->vme_next;
4408                                         } else
4409                                                 entry = first_entry;
4410                                 }
4411                                 last_timestamp = map->timestamp;
4412                         }
4413                 }
4414
4415
4416                 if ((entry->wired_count == 0) ||
4417                     (user_wire && entry->user_wired_count == 0)) {
4418                         if (!user_wire)
4419                                 panic("vm_map_unwire: entry is unwired");
4420
4421                         entry = entry->vme_next;
4422                         continue;
4423                 }
4424
4425                 assert(entry->wired_count > 0 &&
4426                        (!user_wire || entry->user_wired_count > 0));
4427
4428                 vm_map_clip_start(map, entry, start);
4429                 vm_map_clip_end(map, entry, end);
4430
4431                 /*
4432                  * Check for holes
4433                  * Holes: Next entry should be contiguous unless
4434                  *        this is the end of the region.
4435                  */
4436                 if (((entry->vme_end < end) &&
4437                      ((entry->vme_next == vm_map_to_entry(map)) ||
4438                       (entry->vme_next->vme_start > entry->vme_end)))) {
4439
4440                         if (!user_wire)
4441                                 panic("vm_map_unwire: non-contiguous region");
4442                         entry = entry->vme_next;
4443                         continue;
4444                 }
4445
4446                 subtract_wire_counts(map, entry, user_wire);
4447
4448                 if (entry->wired_count != 0) {
4449                         entry = entry->vme_next;
4450                         continue;
4451                 }
4452
4453                 if(entry->zero_wired_pages) {
4454                         entry->zero_wired_pages = FALSE;
4455                 }
4456
4457                 entry->in_transition = TRUE;
4458                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4459
4460                 /*
4461                  * We can unlock the map now. The in_transition state
4462                  * guarantees existance of the entry.
4463                  */
4464                 vm_map_unlock(map);
4465                 if(map_pmap) {
4466                         vm_fault_unwire(map,
4467                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4468                 } else {
4469                         vm_fault_unwire(map,
4470                                         &tmp_entry, FALSE, map->pmap,
4471                                         tmp_entry.vme_start);
4472                 }
4473                 vm_map_lock(map);
4474
4475                 if (last_timestamp+1 != map->timestamp) {
4476                         /*
4477                          * Find the entry again.  It could have been clipped
4478                          * or deleted after we unlocked the map.
4479                          */
4480                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4481                                                  &first_entry)) {
4482                                 if (!user_wire)
4483                                         panic("vm_map_unwire: re-lookup failed");
4484                                 entry = first_entry->vme_next;
4485                         } else
4486                                 entry = first_entry;
4487                 }
4488                 last_timestamp = map->timestamp;
4489
4490                 /*
4491                  * clear transition bit for all constituent entries that
4492                  * were in the original entry (saved in tmp_entry).  Also
4493                  * check for waiters.
4494                  */
4495                 while ((entry != vm_map_to_entry(map)) &&
4496                        (entry->vme_start < tmp_entry.vme_end)) {
4497                         assert(entry->in_transition);
4498                         entry->in_transition = FALSE;
4499                         if (entry->needs_wakeup) {
4500                                 entry->needs_wakeup = FALSE;
4501                                 need_wakeup = TRUE;
4502                         }
4503                         entry = entry->vme_next;
4504                 }
4505         }
4506
4507         /*
4508          * We might have fragmented the address space when we wired this
4509          * range of addresses.  Attempt to re-coalesce these VM map entries
4510          * with their neighbors now that they're no longer wired.
4511          * Under some circumstances, address space fragmentation can
4512          * prevent VM object shadow chain collapsing, which can cause
4513          * swap space leaks.
4514          */
4515         vm_map_simplify_range(map, start, end);
4516
4517         vm_map_unlock(map);
4518         /*
4519          * wake up anybody waiting on entries that we have unwired.
4520          */
4521         if (need_wakeup)
4522                 vm_map_entry_wakeup(map);
4523         return(KERN_SUCCESS);
4524
4525 }
4526
4527 kern_return_t
4528 vm_map_unwire(
4529         register vm_map_t       map,
4530         register vm_map_offset_t        start,
4531         register vm_map_offset_t        end,
4532         boolean_t               user_wire)
4533 {
4534         return vm_map_unwire_nested(map, start, end,
4535                                     user_wire, (pmap_t)NULL, 0);
4536 }
4537
4538
4539 /*
4540  *      vm_map_entry_delete:    [ internal use only ]
4541  *
4542  *      Deallocate the given entry from the target map.
4543  */
4544 static void
4545 vm_map_entry_delete(
4546         register vm_map_t       map,
4547         register vm_map_entry_t entry)
4548 {
4549         register vm_map_offset_t        s, e;
4550         register vm_object_t    object;
4551         register vm_map_t       submap;
4552
4553         s = entry->vme_start;
4554         e = entry->vme_end;
4555         assert(page_aligned(s));
4556         assert(page_aligned(e));
4557         assert(entry->wired_count == 0);
4558         assert(entry->user_wired_count == 0);
4559         assert(!entry->permanent);
4560
4561         if (entry->is_sub_map) {
4562                 object = NULL;
4563                 submap = entry->object.sub_map;
4564         } else {
4565                 submap = NULL;
4566                 object = entry->object.vm_object;
4567         }
4568
4569         vm_map_store_entry_unlink(map, entry);
4570         map->size -= e - s;
4571
4572         vm_map_entry_dispose(map, entry);
4573
4574         vm_map_unlock(map);
4575         /*
4576          *      Deallocate the object only after removing all
4577          *      pmap entries pointing to its pages.
4578          */
4579         if (submap)
4580                 vm_map_deallocate(submap);
4581         else
4582                 vm_object_deallocate(object);
4583
4584 }
4585
4586 void
4587 vm_map_submap_pmap_clean(
4588         vm_map_t        map,
4589         vm_map_offset_t start,
4590         vm_map_offset_t end,
4591         vm_map_t        sub_map,
4592         vm_map_offset_t offset)
4593 {
4594         vm_map_offset_t submap_start;
4595         vm_map_offset_t submap_end;
4596         vm_map_size_t   remove_size;
4597         vm_map_entry_t  entry;
4598
4599         submap_end = offset + (end - start);
4600         submap_start = offset;
4601
4602         vm_map_lock_read(sub_map);
4603         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4604
4605                 remove_size = (entry->vme_end - entry->vme_start);
4606                 if(offset > entry->vme_start)
4607                         remove_size -= offset - entry->vme_start;
4608
4609
4610                 if(submap_end < entry->vme_end) {
4611                         remove_size -=
4612                                 entry->vme_end - submap_end;
4613                 }
4614                 if(entry->is_sub_map) {
4615                         vm_map_submap_pmap_clean(
4616                                 sub_map,
4617                                 start,
4618                                 start + remove_size,
4619                                 entry->object.sub_map,
4620                                 entry->offset);
4621                 } else {
4622
4623                         if((map->mapped) && (map->ref_count)
4624                            && (entry->object.vm_object != NULL)) {
4625                                 vm_object_pmap_protect(
4626                                         entry->object.vm_object,
4627                                         entry->offset+(offset-entry->vme_start),
4628                                         remove_size,
4629                                         PMAP_NULL,
4630                                         entry->vme_start,
4631                                         VM_PROT_NONE);
4632                         } else {
4633                                 pmap_remove(map->pmap,
4634                                             (addr64_t)start,
4635                                             (addr64_t)(start + remove_size));
4636                         }
4637                 }
4638         }
4639
4640         entry = entry->vme_next;
4641
4642         while((entry != vm_map_to_entry(sub_map))
4643               && (entry->vme_start < submap_end)) {
4644                 remove_size = (entry->vme_end - entry->vme_start);
4645                 if(submap_end < entry->vme_end) {
4646                         remove_size -= entry->vme_end - submap_end;
4647                 }
4648                 if(entry->is_sub_map) {
4649                         vm_map_submap_pmap_clean(
4650                                 sub_map,
4651                                 (start + entry->vme_start) - offset,
4652                                 ((start + entry->vme_start) - offset) + remove_size,
4653                                 entry->object.sub_map,
4654                                 entry->offset);
4655                 } else {
4656                         if((map->mapped) && (map->ref_count)
4657                            && (entry->object.vm_object != NULL)) {
4658                                 vm_object_pmap_protect(
4659                                         entry->object.vm_object,
4660                                         entry->offset,
4661                                         remove_size,
4662                                         PMAP_NULL,
4663                                         entry->vme_start,
4664                                         VM_PROT_NONE);
4665                         } else {
4666                                 pmap_remove(map->pmap,
4667                                             (addr64_t)((start + entry->vme_start)
4668                                                        - offset),
4669                                             (addr64_t)(((start + entry->vme_start)
4670                                                         - offset) + remove_size));
4671                         }
4672                 }
4673                 entry = entry->vme_next;
4674         }
4675         vm_map_unlock_read(sub_map);
4676         return;
4677 }
4678
4679 /*
4680  *      vm_map_delete:  [ internal use only ]
4681  *
4682  *      Deallocates the given address range from the target map.
4683  *      Removes all user wirings. Unwires one kernel wiring if
4684  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4685  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4686  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4687  *
4688  *      This routine is called with map locked and leaves map locked.
4689  */
4690 static kern_return_t
4691 vm_map_delete(
4692         vm_map_t                map,
4693         vm_map_offset_t         start,
4694         vm_map_offset_t         end,
4695         int                     flags,
4696         vm_map_t                zap_map)
4697 {
4698         vm_map_entry_t          entry, next;
4699         struct   vm_map_entry   *first_entry, tmp_entry;
4700         register vm_map_offset_t s;
4701         register vm_object_t    object;
4702         boolean_t               need_wakeup;
4703         unsigned int            last_timestamp = ~0; /* unlikely value */
4704         int                     interruptible;
4705
4706         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4707                 THREAD_ABORTSAFE : THREAD_UNINT;
4708
4709         /*
4710          * All our DMA I/O operations in IOKit are currently done by
4711          * wiring through the map entries of the task requesting the I/O.
4712          * Because of this, we must always wait for kernel wirings
4713          * to go away on the entries before deleting them.
4714          *
4715          * Any caller who wants to actually remove a kernel wiring
4716          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4717          * properly remove one wiring instead of blasting through
4718          * them all.
4719          */
4720         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4721
4722         while(1) {
4723                 /*
4724                  *      Find the start of the region, and clip it
4725                  */
4726                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4727                         entry = first_entry;
4728                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4729                                 start = SUPERPAGE_ROUND_DOWN(start);
4730                                 continue;
4731                         }
4732                         if (start == entry->vme_start) {
4733                                 /*
4734                                  * No need to clip.  We don't want to cause
4735                                  * any unnecessary unnesting in this case...
4736                                  */
4737                         } else {
4738                                 vm_map_clip_start(map, entry, start);
4739                         }
4740
4741                         /*
4742                          *      Fix the lookup hint now, rather than each
4743                          *      time through the loop.
4744                          */
4745                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4746                 } else {
4747                         entry = first_entry->vme_next;
4748                 }
4749                 break;
4750         }
4751         if (entry->superpage_size)
4752                 end = SUPERPAGE_ROUND_UP(end);
4753
4754         need_wakeup = FALSE;
4755         /*
4756          *      Step through all entries in this region
4757          */
4758         s = entry->vme_start;
4759         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4760                 /*
4761                  * At this point, we have deleted all the memory entries
4762                  * between "start" and "s".  We still need to delete
4763                  * all memory entries between "s" and "end".
4764                  * While we were blocked and the map was unlocked, some
4765                  * new memory entries could have been re-allocated between
4766                  * "start" and "s" and we don't want to mess with those.
4767                  * Some of those entries could even have been re-assembled
4768                  * with an entry after "s" (in vm_map_simplify_entry()), so
4769                  * we may have to vm_map_clip_start() again.
4770                  */
4771
4772                 if (entry->vme_start >= s) {
4773                         /*
4774                          * This entry starts on or after "s"
4775                          * so no need to clip its start.
4776                          */
4777                 } else {
4778                         /*
4779                          * This entry has been re-assembled by a
4780                          * vm_map_simplify_entry().  We need to
4781                          * re-clip its start.
4782                          */
4783                         vm_map_clip_start(map, entry, s);
4784                 }
4785                 if (entry->vme_end <= end) {
4786                         /*
4787                          * This entry is going away completely, so no need
4788                          * to clip and possibly cause an unnecessary unnesting.
4789                          */
4790                 } else {
4791                         vm_map_clip_end(map, entry, end);
4792                 }
4793
4794                 if (entry->permanent) {
4795                         panic("attempt to remove permanent VM map entry "
4796                               "%p [0x%llx:0x%llx]\n",
4797                               entry, (uint64_t) s, (uint64_t) end);
4798                 }
4799
4800
4801                 if (entry->in_transition) {
4802                         wait_result_t wait_result;
4803
4804                         /*
4805                          * Another thread is wiring/unwiring this entry.
4806                          * Let the other thread know we are waiting.
4807                          */
4808                         assert(s == entry->vme_start);
4809                         entry->needs_wakeup = TRUE;
4810
4811                         /*
4812                          * wake up anybody waiting on entries that we have
4813                          * already unwired/deleted.
4814                          */
4815                         if (need_wakeup) {
4816                                 vm_map_entry_wakeup(map);
4817                                 need_wakeup = FALSE;
4818                         }
4819
4820                         wait_result = vm_map_entry_wait(map, interruptible);
4821
4822                         if (interruptible &&
4823                             wait_result == THREAD_INTERRUPTED) {
4824                                 /*
4825                                  * We do not clear the needs_wakeup flag,
4826                                  * since we cannot tell if we were the only one.
4827                                  */
4828                                 vm_map_unlock(map);
4829                                 return KERN_ABORTED;
4830                         }
4831
4832                         /*
4833                          * The entry could have been clipped or it
4834                          * may not exist anymore.  Look it up again.
4835                          */
4836                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4837                                 assert((map != kernel_map) &&
4838                                        (!entry->is_sub_map));
4839                                 /*
4840                                  * User: use the next entry
4841                                  */
4842                                 entry = first_entry->vme_next;
4843                                 s = entry->vme_start;
4844                         } else {
4845                                 entry = first_entry;
4846                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4847                         }
4848                         last_timestamp = map->timestamp;
4849                         continue;
4850                 } /* end in_transition */
4851
4852                 if (entry->wired_count) {
4853                         boolean_t       user_wire;
4854
4855                         user_wire = entry->user_wired_count > 0;
4856
4857                         /*
4858                          *      Remove a kernel wiring if requested
4859                          */
4860                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
4861                                 entry->wired_count--;
4862                         }
4863
4864                         /*
4865                          *      Remove all user wirings for proper accounting
4866                          */
4867                         if (entry->user_wired_count > 0) {
4868                                 while (entry->user_wired_count)
4869                                         subtract_wire_counts(map, entry, user_wire);
4870                         }
4871
4872                         if (entry->wired_count != 0) {
4873                                 assert(map != kernel_map);
4874                                 /*
4875                                  * Cannot continue.  Typical case is when
4876                                  * a user thread has physical io pending on
4877                                  * on this page.  Either wait for the
4878                                  * kernel wiring to go away or return an
4879                                  * error.
4880                                  */
4881                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4882                                         wait_result_t wait_result;
4883
4884                                         assert(s == entry->vme_start);
4885                                         entry->needs_wakeup = TRUE;
4886                                         wait_result = vm_map_entry_wait(map,
4887                                                                         interruptible);
4888
4889                                         if (interruptible &&
4890                                             wait_result == THREAD_INTERRUPTED) {
4891                                                 /*
4892                                                  * We do not clear the
4893                                                  * needs_wakeup flag, since we
4894                                                  * cannot tell if we were the
4895                                                  * only one.
4896                                                  */
4897                                                 vm_map_unlock(map);
4898                                                 return KERN_ABORTED;
4899                                         }
4900
4901                                         /*
4902                                          * The entry could have been clipped or
4903                                          * it may not exist anymore.  Look it
4904                                          * up again.
4905                                          */
4906                                         if (!vm_map_lookup_entry(map, s,
4907                                                                  &first_entry)) {
4908                                                 assert(map != kernel_map);
4909                                                 /*
4910                                                  * User: use the next entry
4911                                                  */
4912                                                 entry = first_entry->vme_next;
4913                                                 s = entry->vme_start;
4914                                         } else {
4915                                                 entry = first_entry;
4916                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4917                                         }
4918                                         last_timestamp = map->timestamp;
4919                                         continue;
4920                                 }
4921                                 else {
4922                                         return KERN_FAILURE;
4923                                 }
4924                         }
4925
4926                         entry->in_transition = TRUE;
4927                         /*
4928                          * copy current entry.  see comment in vm_map_wire()
4929                          */
4930                         tmp_entry = *entry;
4931                         assert(s == entry->vme_start);
4932
4933                         /*
4934                          * We can unlock the map now. The in_transition
4935                          * state guarentees existance of the entry.
4936                          */
4937                         vm_map_unlock(map);
4938
4939                         if (tmp_entry.is_sub_map) {
4940                                 vm_map_t sub_map;
4941                                 vm_map_offset_t sub_start, sub_end;
4942                                 pmap_t pmap;
4943                                 vm_map_offset_t pmap_addr;
4944
4945
4946                                 sub_map = tmp_entry.object.sub_map;
4947                                 sub_start = tmp_entry.offset;
4948                                 sub_end = sub_start + (tmp_entry.vme_end -
4949                                                        tmp_entry.vme_start);
4950                                 if (tmp_entry.use_pmap) {
4951                                         pmap = sub_map->pmap;
4952                                         pmap_addr = tmp_entry.vme_start;
4953                                 } else {
4954                                         pmap = map->pmap;
4955                                         pmap_addr = tmp_entry.vme_start;
4956                                 }
4957                                 (void) vm_map_unwire_nested(sub_map,
4958                                                             sub_start, sub_end,
4959                                                             user_wire,
4960                                                             pmap, pmap_addr);
4961                         } else {
4962
4963                                 vm_fault_unwire(map, &tmp_entry,
4964                                                 tmp_entry.object.vm_object == kernel_object,
4965                                                 map->pmap, tmp_entry.vme_start);
4966                         }
4967
4968                         vm_map_lock(map);
4969
4970                         if (last_timestamp+1 != map->timestamp) {
4971                                 /*
4972                                  * Find the entry again.  It could have
4973                                  * been clipped after we unlocked the map.
4974                                  */
4975                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
4976                                         assert((map != kernel_map) &&
4977                                                (!entry->is_sub_map));
4978                                         first_entry = first_entry->vme_next;
4979                                         s = first_entry->vme_start;
4980                                 } else {
4981                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4982                                 }
4983                         } else {
4984                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4985                                 first_entry = entry;
4986                         }
4987
4988                         last_timestamp = map->timestamp;
4989
4990                         entry = first_entry;
4991                         while ((entry != vm_map_to_entry(map)) &&
4992                                (entry->vme_start < tmp_entry.vme_end)) {
4993                                 assert(entry->in_transition);
4994                                 entry->in_transition = FALSE;
4995                                 if (entry->needs_wakeup) {
4996                                         entry->needs_wakeup = FALSE;
4997                                         need_wakeup = TRUE;
4998                                 }
4999                                 entry = entry->vme_next;
5000                         }
5001                         /*
5002                          * We have unwired the entry(s).  Go back and
5003                          * delete them.
5004                          */
5005                         entry = first_entry;
5006                         continue;
5007                 }
5008
5009                 /* entry is unwired */
5010                 assert(entry->wired_count == 0);
5011                 assert(entry->user_wired_count == 0);
5012
5013                 assert(s == entry->vme_start);
5014
5015                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5016                         /*
5017                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5018                          * vm_map_delete(), some map entries might have been
5019                          * transferred to a "zap_map", which doesn't have a
5020                          * pmap.  The original pmap has already been flushed
5021                          * in the vm_map_delete() call targeting the original
5022                          * map, but when we get to destroying the "zap_map",
5023                          * we don't have any pmap to flush, so let's just skip
5024                          * all this.
5025                          */
5026                 } else if (entry->is_sub_map) {
5027                         if (entry->use_pmap) {
5028 #ifndef NO_NESTED_PMAP
5029                                 pmap_unnest(map->pmap,
5030                                             (addr64_t)entry->vme_start,
5031                                             entry->vme_end - entry->vme_start);
5032 #endif  /* NO_NESTED_PMAP */
5033                                 if ((map->mapped) && (map->ref_count)) {
5034                                         /* clean up parent map/maps */
5035                                         vm_map_submap_pmap_clean(
5036                                                 map, entry->vme_start,
5037                                                 entry->vme_end,
5038                                                 entry->object.sub_map,
5039                                                 entry->offset);
5040                                 }
5041                         } else {
5042                                 vm_map_submap_pmap_clean(
5043                                         map, entry->vme_start, entry->vme_end,
5044                                         entry->object.sub_map,
5045                                         entry->offset);
5046                         }
5047                 } else if (entry->object.vm_object != kernel_object) {
5048                         object = entry->object.vm_object;
5049                         if((map->mapped) && (map->ref_count)) {
5050                                 vm_object_pmap_protect(
5051                                         object, entry->offset,
5052                                         entry->vme_end - entry->vme_start,
5053                                         PMAP_NULL,
5054                                         entry->vme_start,
5055                                         VM_PROT_NONE);
5056                         } else {
5057                                 pmap_remove(map->pmap,
5058                                             (addr64_t)entry->vme_start,
5059                                             (addr64_t)entry->vme_end);
5060                         }
5061                 }
5062
5063                 /*
5064                  * All pmap mappings for this map entry must have been
5065                  * cleared by now.
5066                  */
5067                 assert(vm_map_pmap_is_empty(map,
5068                                             entry->vme_start,
5069                                             entry->vme_end));
5070
5071                 next = entry->vme_next;
5072                 s = next->vme_start;
5073                 last_timestamp = map->timestamp;
5074
5075                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5076                     zap_map != VM_MAP_NULL) {
5077                         vm_map_size_t entry_size;
5078                         /*
5079                          * The caller wants to save the affected VM map entries
5080                          * into the "zap_map".  The caller will take care of
5081                          * these entries.
5082                          */
5083                         /* unlink the entry from "map" ... */
5084                         vm_map_store_entry_unlink(map, entry);
5085                         /* ... and add it to the end of the "zap_map" */
5086                         vm_map_store_entry_link(zap_map,
5087                                           vm_map_last_entry(zap_map),
5088                                           entry);
5089                         entry_size = entry->vme_end - entry->vme_start;
5090                         map->size -= entry_size;
5091                         zap_map->size += entry_size;
5092                         /* we didn't unlock the map, so no timestamp increase */
5093                         last_timestamp--;
5094                 } else {
5095                         vm_map_entry_delete(map, entry);
5096                         /* vm_map_entry_delete unlocks the map */
5097                         vm_map_lock(map);
5098                 }
5099
5100                 entry = next;
5101
5102                 if(entry == vm_map_to_entry(map)) {
5103                         break;
5104                 }
5105                 if (last_timestamp+1 != map->timestamp) {
5106                         /*
5107                          * we are responsible for deleting everything
5108                          * from the give space, if someone has interfered
5109                          * we pick up where we left off, back fills should
5110                          * be all right for anyone except map_delete and
5111                          * we have to assume that the task has been fully
5112                          * disabled before we get here
5113                          */
5114                         if (!vm_map_lookup_entry(map, s, &entry)){
5115                                 entry = entry->vme_next;
5116                                 s = entry->vme_start;
5117                         } else {
5118                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5119                         }
5120                         /*
5121                          * others can not only allocate behind us, we can
5122                          * also see coalesce while we don't have the map lock
5123                          */
5124                         if(entry == vm_map_to_entry(map)) {
5125                                 break;
5126                         }
5127                 }
5128                 last_timestamp = map->timestamp;
5129         }
5130
5131         if (map->wait_for_space)
5132                 thread_wakeup((event_t) map);
5133         /*
5134          * wake up anybody waiting on entries that we have already deleted.
5135          */
5136         if (need_wakeup)
5137                 vm_map_entry_wakeup(map);
5138
5139         return KERN_SUCCESS;
5140 }
5141
5142 /*
5143  *      vm_map_remove:
5144  *
5145  *      Remove the given address range from the target map.
5146  *      This is the exported form of vm_map_delete.
5147  */
5148 kern_return_t
5149 vm_map_remove(
5150         register vm_map_t       map,
5151         register vm_map_offset_t        start,
5152         register vm_map_offset_t        end,
5153         register boolean_t      flags)
5154 {
5155         register kern_return_t  result;
5156
5157         vm_map_lock(map);
5158         VM_MAP_RANGE_CHECK(map, start, end);
5159         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5160         vm_map_unlock(map);
5161
5162         return(result);
5163 }
5164
5165
5166 /*
5167  *      Routine:        vm_map_copy_discard
5168  *
5169  *      Description:
5170  *              Dispose of a map copy object (returned by
5171  *              vm_map_copyin).
5172  */
5173 void
5174 vm_map_copy_discard(
5175         vm_map_copy_t   copy)
5176 {
5177         if (copy == VM_MAP_COPY_NULL)
5178                 return;
5179
5180         switch (copy->type) {
5181         case VM_MAP_COPY_ENTRY_LIST:
5182                 while (vm_map_copy_first_entry(copy) !=
5183                        vm_map_copy_to_entry(copy)) {
5184                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5185
5186                         vm_map_copy_entry_unlink(copy, entry);
5187                         vm_object_deallocate(entry->object.vm_object);
5188                         vm_map_copy_entry_dispose(copy, entry);
5189                 }
5190                 break;
5191         case VM_MAP_COPY_OBJECT:
5192                 vm_object_deallocate(copy->cpy_object);
5193                 break;
5194         case VM_MAP_COPY_KERNEL_BUFFER:
5195
5196                 /*
5197                  * The vm_map_copy_t and possibly the data buffer were
5198                  * allocated by a single call to kalloc(), i.e. the
5199                  * vm_map_copy_t was not allocated out of the zone.
5200                  */
5201                 kfree(copy, copy->cpy_kalloc_size);
5202                 return;
5203         }
5204         zfree(vm_map_copy_zone, copy);
5205 }
5206
5207 /*
5208  *      Routine:        vm_map_copy_copy
5209  *
5210  *      Description:
5211  *                      Move the information in a map copy object to
5212  *                      a new map copy object, leaving the old one
5213  *                      empty.
5214  *
5215  *                      This is used by kernel routines that need
5216  *                      to look at out-of-line data (in copyin form)
5217  *                      before deciding whether to return SUCCESS.
5218  *                      If the routine returns FAILURE, the original
5219  *                      copy object will be deallocated; therefore,
5220  *                      these routines must make a copy of the copy
5221  *                      object and leave the original empty so that
5222  *                      deallocation will not fail.
5223  */
5224 vm_map_copy_t
5225 vm_map_copy_copy(
5226         vm_map_copy_t   copy)
5227 {
5228         vm_map_copy_t   new_copy;
5229
5230         if (copy == VM_MAP_COPY_NULL)
5231                 return VM_MAP_COPY_NULL;
5232
5233         /*
5234          * Allocate a new copy object, and copy the information
5235          * from the old one into it.
5236          */
5237
5238         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5239         *new_copy = *copy;
5240
5241         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5242                 /*
5243                  * The links in the entry chain must be
5244                  * changed to point to the new copy object.
5245                  */
5246                 vm_map_copy_first_entry(copy)->vme_prev
5247                         = vm_map_copy_to_entry(new_copy);
5248                 vm_map_copy_last_entry(copy)->vme_next
5249                         = vm_map_copy_to_entry(new_copy);
5250         }
5251
5252         /*
5253          * Change the old copy object into one that contains
5254          * nothing to be deallocated.
5255          */
5256         copy->type = VM_MAP_COPY_OBJECT;
5257         copy->cpy_object = VM_OBJECT_NULL;
5258
5259         /*
5260          * Return the new object.
5261          */
5262         return new_copy;
5263 }
5264
5265 static kern_return_t
5266 vm_map_overwrite_submap_recurse(
5267         vm_map_t        dst_map,
5268         vm_map_offset_t dst_addr,
5269         vm_map_size_t   dst_size)
5270 {
5271         vm_map_offset_t dst_end;
5272         vm_map_entry_t  tmp_entry;
5273         vm_map_entry_t  entry;
5274         kern_return_t   result;
5275         boolean_t       encountered_sub_map = FALSE;
5276
5277
5278
5279         /*
5280          *      Verify that the destination is all writeable
5281          *      initially.  We have to trunc the destination
5282          *      address and round the copy size or we'll end up
5283          *      splitting entries in strange ways.
5284          */
5285
5286         dst_end = vm_map_round_page(dst_addr + dst_size);
5287         vm_map_lock(dst_map);
5288
5289 start_pass_1:
5290         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5291                 vm_map_unlock(dst_map);
5292                 return(KERN_INVALID_ADDRESS);
5293         }
5294
5295         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5296         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5297
5298         for (entry = tmp_entry;;) {
5299                 vm_map_entry_t  next;
5300
5301                 next = entry->vme_next;
5302                 while(entry->is_sub_map) {
5303                         vm_map_offset_t sub_start;
5304                         vm_map_offset_t sub_end;
5305                         vm_map_offset_t local_end;
5306
5307                         if (entry->in_transition) {
5308                                 /*
5309                                  * Say that we are waiting, and wait for entry.
5310                                  */
5311                                 entry->needs_wakeup = TRUE;
5312                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5313
5314                                 goto start_pass_1;
5315                         }
5316
5317                         encountered_sub_map = TRUE;
5318                         sub_start = entry->offset;
5319
5320                         if(entry->vme_end < dst_end)
5321                                 sub_end = entry->vme_end;
5322                         else
5323                                 sub_end = dst_end;
5324                         sub_end -= entry->vme_start;
5325                         sub_end += entry->offset;
5326                         local_end = entry->vme_end;
5327                         vm_map_unlock(dst_map);
5328
5329                         result = vm_map_overwrite_submap_recurse(
5330                                 entry->object.sub_map,
5331                                 sub_start,
5332                                 sub_end - sub_start);
5333
5334                         if(result != KERN_SUCCESS)
5335                                 return result;
5336                         if (dst_end <= entry->vme_end)
5337                                 return KERN_SUCCESS;
5338                         vm_map_lock(dst_map);
5339                         if(!vm_map_lookup_entry(dst_map, local_end,
5340                                                 &tmp_entry)) {
5341                                 vm_map_unlock(dst_map);
5342                                 return(KERN_INVALID_ADDRESS);
5343                         }
5344                         entry = tmp_entry;
5345                         next = entry->vme_next;
5346                 }
5347
5348                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5349                         vm_map_unlock(dst_map);
5350                         return(KERN_PROTECTION_FAILURE);
5351                 }
5352
5353                 /*
5354                  *      If the entry is in transition, we must wait
5355                  *      for it to exit that state.  Anything could happen
5356                  *      when we unlock the map, so start over.
5357                  */
5358                 if (entry->in_transition) {
5359
5360                         /*
5361                          * Say that we are waiting, and wait for entry.
5362                          */
5363                         entry->needs_wakeup = TRUE;
5364                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5365
5366                         goto start_pass_1;
5367                 }
5368
5369 /*
5370  *              our range is contained completely within this map entry
5371  */
5372                 if (dst_end <= entry->vme_end) {
5373                         vm_map_unlock(dst_map);
5374                         return KERN_SUCCESS;
5375                 }
5376 /*
5377  *              check that range specified is contiguous region
5378  */
5379                 if ((next == vm_map_to_entry(dst_map)) ||
5380                     (next->vme_start != entry->vme_end)) {
5381                         vm_map_unlock(dst_map);
5382                         return(KERN_INVALID_ADDRESS);
5383                 }
5384
5385                 /*
5386                  *      Check for permanent objects in the destination.
5387                  */
5388                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5389                     ((!entry->object.vm_object->internal) ||
5390                      (entry->object.vm_object->true_share))) {
5391                         if(encountered_sub_map) {
5392                                 vm_map_unlock(dst_map);
5393                                 return(KERN_FAILURE);
5394                         }
5395                 }
5396
5397
5398                 entry = next;
5399         }/* for */
5400         vm_map_unlock(dst_map);
5401         return(KERN_SUCCESS);
5402 }
5403
5404 /*
5405  *      Routine:        vm_map_copy_overwrite
5406  *
5407  *      Description:
5408  *              Copy the memory described by the map copy
5409  *              object (copy; returned by vm_map_copyin) onto
5410  *              the specified destination region (dst_map, dst_addr).
5411  *              The destination must be writeable.
5412  *
5413  *              Unlike vm_map_copyout, this routine actually
5414  *              writes over previously-mapped memory.  If the
5415  *              previous mapping was to a permanent (user-supplied)
5416  *              memory object, it is preserved.
5417  *
5418  *              The attributes (protection and inheritance) of the
5419  *              destination region are preserved.
5420  *
5421  *              If successful, consumes the copy object.
5422  *              Otherwise, the caller is responsible for it.
5423  *
5424  *      Implementation notes:
5425  *              To overwrite aligned temporary virtual memory, it is
5426  *              sufficient to remove the previous mapping and insert
5427  *              the new copy.  This replacement is done either on
5428  *              the whole region (if no permanent virtual memory
5429  *              objects are embedded in the destination region) or
5430  *              in individual map entries.
5431  *
5432  *              To overwrite permanent virtual memory , it is necessary
5433  *              to copy each page, as the external memory management
5434  *              interface currently does not provide any optimizations.
5435  *
5436  *              Unaligned memory also has to be copied.  It is possible
5437  *              to use 'vm_trickery' to copy the aligned data.  This is
5438  *              not done but not hard to implement.
5439  *
5440  *              Once a page of permanent memory has been overwritten,
5441  *              it is impossible to interrupt this function; otherwise,
5442  *              the call would be neither atomic nor location-independent.
5443  *              The kernel-state portion of a user thread must be
5444  *              interruptible.
5445  *
5446  *              It may be expensive to forward all requests that might
5447  *              overwrite permanent memory (vm_write, vm_copy) to
5448  *              uninterruptible kernel threads.  This routine may be
5449  *              called by interruptible threads; however, success is
5450  *              not guaranteed -- if the request cannot be performed
5451  *              atomically and interruptibly, an error indication is
5452  *              returned.
5453  */
5454
5455 static kern_return_t
5456 vm_map_copy_overwrite_nested(
5457         vm_map_t                dst_map,
5458         vm_map_address_t        dst_addr,
5459         vm_map_copy_t           copy,
5460         boolean_t               interruptible,
5461         pmap_t                  pmap,
5462         boolean_t               discard_on_success)
5463 {
5464         vm_map_offset_t         dst_end;
5465         vm_map_entry_t          tmp_entry;
5466         vm_map_entry_t          entry;
5467         kern_return_t           kr;
5468         boolean_t               aligned = TRUE;
5469         boolean_t               contains_permanent_objects = FALSE;
5470         boolean_t               encountered_sub_map = FALSE;
5471         vm_map_offset_t         base_addr;
5472         vm_map_size_t           copy_size;
5473         vm_map_size_t           total_size;
5474
5475
5476         /*
5477          *      Check for null copy object.
5478          */
5479
5480         if (copy == VM_MAP_COPY_NULL)
5481                 return(KERN_SUCCESS);
5482
5483         /*
5484          *      Check for special kernel buffer allocated
5485          *      by new_ipc_kmsg_copyin.
5486          */
5487
5488         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5489                 return(vm_map_copyout_kernel_buffer(
5490                                dst_map, &dst_addr,
5491                                copy, TRUE));
5492         }
5493
5494         /*
5495          *      Only works for entry lists at the moment.  Will
5496          *      support page lists later.
5497          */
5498
5499         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5500
5501         if (copy->size == 0) {
5502                 if (discard_on_success)
5503                         vm_map_copy_discard(copy);
5504                 return(KERN_SUCCESS);
5505         }
5506
5507         /*
5508          *      Verify that the destination is all writeable
5509          *      initially.  We have to trunc the destination
5510          *      address and round the copy size or we'll end up
5511          *      splitting entries in strange ways.
5512          */
5513
5514         if (!page_aligned(copy->size) ||
5515             !page_aligned (copy->offset) ||
5516             !page_aligned (dst_addr))
5517         {
5518                 aligned = FALSE;
5519                 dst_end = vm_map_round_page(dst_addr + copy->size);
5520         } else {
5521                 dst_end = dst_addr + copy->size;
5522         }
5523
5524         vm_map_lock(dst_map);
5525
5526         /* LP64todo - remove this check when vm_map_commpage64()
5527          * no longer has to stuff in a map_entry for the commpage
5528          * above the map's max_offset.
5529          */
5530         if (dst_addr >= dst_map->max_offset) {
5531                 vm_map_unlock(dst_map);
5532                 return(KERN_INVALID_ADDRESS);
5533         }
5534
5535 start_pass_1:
5536         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5537                 vm_map_unlock(dst_map);
5538                 return(KERN_INVALID_ADDRESS);
5539         }
5540         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5541         for (entry = tmp_entry;;) {
5542                 vm_map_entry_t  next = entry->vme_next;
5543
5544                 while(entry->is_sub_map) {
5545                         vm_map_offset_t sub_start;
5546                         vm_map_offset_t sub_end;
5547                         vm_map_offset_t local_end;
5548
5549                         if (entry->in_transition) {
5550
5551                                 /*
5552                                  * Say that we are waiting, and wait for entry.
5553                                  */
5554                                 entry->needs_wakeup = TRUE;
5555                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5556
5557                                 goto start_pass_1;
5558                         }
5559
5560                         local_end = entry->vme_end;
5561                         if (!(entry->needs_copy)) {
5562                                 /* if needs_copy we are a COW submap */
5563                                 /* in such a case we just replace so */
5564                                 /* there is no need for the follow-  */
5565                                 /* ing check.                        */
5566                                 encountered_sub_map = TRUE;
5567                                 sub_start = entry->offset;
5568
5569                                 if(entry->vme_end < dst_end)
5570                                         sub_end = entry->vme_end;
5571                                 else
5572                                         sub_end = dst_end;
5573                                 sub_end -= entry->vme_start;
5574                                 sub_end += entry->offset;
5575                                 vm_map_unlock(dst_map);
5576
5577                                 kr = vm_map_overwrite_submap_recurse(
5578                                         entry->object.sub_map,
5579                                         sub_start,
5580                                         sub_end - sub_start);
5581                                 if(kr != KERN_SUCCESS)
5582                                         return kr;
5583                                 vm_map_lock(dst_map);
5584                         }
5585
5586                         if (dst_end <= entry->vme_end)
5587                                 goto start_overwrite;
5588                         if(!vm_map_lookup_entry(dst_map, local_end,
5589                                                 &entry)) {
5590                                 vm_map_unlock(dst_map);
5591                                 return(KERN_INVALID_ADDRESS);
5592                         }
5593                         next = entry->vme_next;
5594                 }
5595
5596                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5597                         vm_map_unlock(dst_map);
5598                         return(KERN_PROTECTION_FAILURE);
5599                 }
5600
5601                 /*
5602                  *      If the entry is in transition, we must wait
5603                  *      for it to exit that state.  Anything could happen
5604                  *      when we unlock the map, so start over.
5605                  */
5606                 if (entry->in_transition) {
5607
5608                         /*
5609                          * Say that we are waiting, and wait for entry.
5610                          */
5611                         entry->needs_wakeup = TRUE;
5612                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5613
5614                         goto start_pass_1;
5615                 }
5616
5617 /*
5618  *              our range is contained completely within this map entry
5619  */
5620                 if (dst_end <= entry->vme_end)
5621                         break;
5622 /*
5623  *              check that range specified is contiguous region
5624  */
5625                 if ((next == vm_map_to_entry(dst_map)) ||
5626                     (next->vme_start != entry->vme_end)) {
5627                         vm_map_unlock(dst_map);
5628                         return(KERN_INVALID_ADDRESS);
5629                 }
5630
5631
5632                 /*
5633                  *      Check for permanent objects in the destination.
5634                  */
5635                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5636                     ((!entry->object.vm_object->internal) ||
5637                      (entry->object.vm_object->true_share))) {
5638                         contains_permanent_objects = TRUE;
5639                 }
5640
5641                 entry = next;
5642         }/* for */
5643
5644 start_overwrite:
5645         /*
5646          *      If there are permanent objects in the destination, then
5647          *      the copy cannot be interrupted.
5648          */
5649
5650         if (interruptible && contains_permanent_objects) {
5651                 vm_map_unlock(dst_map);
5652                 return(KERN_FAILURE);   /* XXX */
5653         }
5654
5655         /*
5656          *
5657          *      Make a second pass, overwriting the data
5658          *      At the beginning of each loop iteration,
5659          *      the next entry to be overwritten is "tmp_entry"
5660          *      (initially, the value returned from the lookup above),
5661          *      and the starting address expected in that entry
5662          *      is "start".
5663          */
5664
5665         total_size = copy->size;
5666         if(encountered_sub_map) {
5667                 copy_size = 0;
5668                 /* re-calculate tmp_entry since we've had the map */
5669                 /* unlocked */
5670                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5671                         vm_map_unlock(dst_map);
5672                         return(KERN_INVALID_ADDRESS);
5673                 }
5674         } else {
5675                 copy_size = copy->size;
5676         }
5677
5678         base_addr = dst_addr;
5679         while(TRUE) {
5680                 /* deconstruct the copy object and do in parts */
5681                 /* only in sub_map, interruptable case */
5682                 vm_map_entry_t  copy_entry;
5683                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5684                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5685                 int             nentries;
5686                 int             remaining_entries = 0;
5687                 vm_map_offset_t new_offset = 0;
5688
5689                 for (entry = tmp_entry; copy_size == 0;) {
5690                         vm_map_entry_t  next;
5691
5692                         next = entry->vme_next;
5693
5694                         /* tmp_entry and base address are moved along */
5695                         /* each time we encounter a sub-map.  Otherwise */
5696                         /* entry can outpase tmp_entry, and the copy_size */
5697                         /* may reflect the distance between them */
5698                         /* if the current entry is found to be in transition */
5699                         /* we will start over at the beginning or the last */
5700                         /* encounter of a submap as dictated by base_addr */
5701                         /* we will zero copy_size accordingly. */
5702                         if (entry->in_transition) {
5703                                 /*
5704                                  * Say that we are waiting, and wait for entry.
5705                                  */
5706                                 entry->needs_wakeup = TRUE;
5707                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5708
5709                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5710                                                         &tmp_entry)) {
5711                                         vm_map_unlock(dst_map);
5712                                         return(KERN_INVALID_ADDRESS);
5713                                 }
5714                                 copy_size = 0;
5715                                 entry = tmp_entry;
5716                                 continue;
5717                         }
5718                         if(entry->is_sub_map) {
5719                                 vm_map_offset_t sub_start;
5720                                 vm_map_offset_t sub_end;
5721                                 vm_map_offset_t local_end;
5722
5723                                 if (entry->needs_copy) {
5724                                         /* if this is a COW submap */
5725                                         /* just back the range with a */
5726                                         /* anonymous entry */
5727                                         if(entry->vme_end < dst_end)
5728                                                 sub_end = entry->vme_end;
5729                                         else
5730                                                 sub_end = dst_end;
5731                                         if(entry->vme_start < base_addr)
5732                                                 sub_start = base_addr;
5733                                         else
5734                                                 sub_start = entry->vme_start;
5735                                         vm_map_clip_end(
5736                                                 dst_map, entry, sub_end);
5737                                         vm_map_clip_start(
5738                                                 dst_map, entry, sub_start);
5739                                         assert(!entry->use_pmap);
5740                                         entry->is_sub_map = FALSE;
5741                                         vm_map_deallocate(
5742                                                 entry->object.sub_map);
5743                                         entry->object.sub_map = NULL;
5744                                         entry->is_shared = FALSE;
5745                                         entry->needs_copy = FALSE;
5746                                         entry->offset = 0;
5747                                         /*
5748                                          * XXX FBDP
5749                                          * We should propagate the protections
5750                                          * of the submap entry here instead
5751                                          * of forcing them to VM_PROT_ALL...
5752                                          * Or better yet, we should inherit
5753                                          * the protection of the copy_entry.
5754                                          */
5755                                         entry->protection = VM_PROT_ALL;
5756                                         entry->max_protection = VM_PROT_ALL;
5757                                         entry->wired_count = 0;
5758                                         entry->user_wired_count = 0;
5759                                         if(entry->inheritance
5760                                            == VM_INHERIT_SHARE)
5761                                                 entry->inheritance = VM_INHERIT_COPY;
5762                                         continue;
5763                                 }
5764                                 /* first take care of any non-sub_map */
5765                                 /* entries to send */
5766                                 if(base_addr < entry->vme_start) {
5767                                         /* stuff to send */
5768                                         copy_size =
5769                                                 entry->vme_start - base_addr;
5770                                         break;
5771                                 }
5772                                 sub_start = entry->offset;
5773
5774                                 if(entry->vme_end < dst_end)
5775                                         sub_end = entry->vme_end;
5776                                 else
5777                                         sub_end = dst_end;
5778                                 sub_end -= entry->vme_start;
5779                                 sub_end += entry->offset;
5780                                 local_end = entry->vme_end;
5781                                 vm_map_unlock(dst_map);
5782                                 copy_size = sub_end - sub_start;
5783
5784                                 /* adjust the copy object */
5785                                 if (total_size > copy_size) {
5786                                         vm_map_size_t   local_size = 0;
5787                                         vm_map_size_t   entry_size;
5788
5789                                         nentries = 1;
5790                                         new_offset = copy->offset;
5791                                         copy_entry = vm_map_copy_first_entry(copy);
5792                                         while(copy_entry !=
5793                                               vm_map_copy_to_entry(copy)){
5794                                                 entry_size = copy_entry->vme_end -
5795                                                         copy_entry->vme_start;
5796                                                 if((local_size < copy_size) &&
5797                                                    ((local_size + entry_size)
5798                                                     >= copy_size)) {
5799                                                         vm_map_copy_clip_end(copy,
5800                                                                              copy_entry,
5801                                                                              copy_entry->vme_start +
5802                                                                              (copy_size - local_size));
5803                                                         entry_size = copy_entry->vme_end -
5804                                                                 copy_entry->vme_start;
5805                                                         local_size += entry_size;
5806                                                         new_offset += entry_size;
5807                                                 }
5808                                                 if(local_size >= copy_size) {
5809                                                         next_copy = copy_entry->vme_next;
5810                                                         copy_entry->vme_next =
5811                                                                 vm_map_copy_to_entry(copy);
5812                                                         previous_prev =
5813                                                                 copy->cpy_hdr.links.prev;
5814                                                         copy->cpy_hdr.links.prev = copy_entry;
5815                                                         copy->size = copy_size;
5816                                                         remaining_entries =
5817                                                                 copy->cpy_hdr.nentries;
5818                                                         remaining_entries -= nentries;
5819                                                         copy->cpy_hdr.nentries = nentries;
5820                                                         break;
5821                                                 } else {
5822                                                         local_size += entry_size;
5823                                                         new_offset += entry_size;
5824                                                         nentries++;
5825                                                 }
5826                                                 copy_entry = copy_entry->vme_next;
5827                                         }
5828                                 }
5829
5830                                 if((entry->use_pmap) && (pmap == NULL)) {
5831                                         kr = vm_map_copy_overwrite_nested(
5832                                                 entry->object.sub_map,
5833                                                 sub_start,
5834                                                 copy,
5835                                                 interruptible,
5836                                                 entry->object.sub_map->pmap,
5837                                                 TRUE);
5838                                 } else if (pmap != NULL) {
5839                                         kr = vm_map_copy_overwrite_nested(
5840                                                 entry->object.sub_map,
5841                                                 sub_start,
5842                                                 copy,
5843                                                 interruptible, pmap,
5844                                                 TRUE);
5845                                 } else {
5846                                         kr = vm_map_copy_overwrite_nested(
5847                                                 entry->object.sub_map,
5848                                                 sub_start,
5849                                                 copy,
5850                                                 interruptible,
5851                                                 dst_map->pmap,
5852                                                 TRUE);
5853                                 }
5854                                 if(kr != KERN_SUCCESS) {
5855                                         if(next_copy != NULL) {
5856                                                 copy->cpy_hdr.nentries +=
5857                                                         remaining_entries;
5858                                                 copy->cpy_hdr.links.prev->vme_next =
5859                                                         next_copy;
5860                                                 copy->cpy_hdr.links.prev
5861                                                         = previous_prev;
5862                                                 copy->size = total_size;
5863                                         }
5864                                         return kr;
5865                                 }
5866                                 if (dst_end <= local_end) {
5867                                         return(KERN_SUCCESS);
5868                                 }
5869                                 /* otherwise copy no longer exists, it was */
5870                                 /* destroyed after successful copy_overwrite */
5871                                 copy = (vm_map_copy_t)
5872                                         zalloc(vm_map_copy_zone);
5873                                 vm_map_copy_first_entry(copy) =
5874                                         vm_map_copy_last_entry(copy) =
5875                                         vm_map_copy_to_entry(copy);
5876                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
5877                                 copy->offset = new_offset;
5878
5879                                 total_size -= copy_size;
5880                                 copy_size = 0;
5881                                 /* put back remainder of copy in container */
5882                                 if(next_copy != NULL) {
5883                                         copy->cpy_hdr.nentries = remaining_entries;
5884                                         copy->cpy_hdr.links.next = next_copy;
5885                                         copy->cpy_hdr.links.prev = previous_prev;
5886                                         copy->size = total_size;
5887                                         next_copy->vme_prev =
5888                                                 vm_map_copy_to_entry(copy);
5889                                         next_copy = NULL;
5890                                 }
5891                                 base_addr = local_end;
5892                                 vm_map_lock(dst_map);
5893                                 if(!vm_map_lookup_entry(dst_map,
5894                                                         local_end, &tmp_entry)) {
5895                                         vm_map_unlock(dst_map);
5896                                         return(KERN_INVALID_ADDRESS);
5897                                 }
5898                                 entry = tmp_entry;
5899                                 continue;
5900                         }
5901                         if (dst_end <= entry->vme_end) {
5902                                 copy_size = dst_end - base_addr;
5903                                 break;
5904                         }
5905
5906                         if ((next == vm_map_to_entry(dst_map)) ||
5907                             (next->vme_start != entry->vme_end)) {
5908                                 vm_map_unlock(dst_map);
5909                                 return(KERN_INVALID_ADDRESS);
5910                         }
5911
5912                         entry = next;
5913                 }/* for */
5914
5915                 next_copy = NULL;
5916                 nentries = 1;
5917
5918                 /* adjust the copy object */
5919                 if (total_size > copy_size) {
5920                         vm_map_size_t   local_size = 0;
5921                         vm_map_size_t   entry_size;
5922
5923                         new_offset = copy->offset;
5924                         copy_entry = vm_map_copy_first_entry(copy);
5925                         while(copy_entry != vm_map_copy_to_entry(copy)) {
5926                                 entry_size = copy_entry->vme_end -
5927                                         copy_entry->vme_start;
5928                                 if((local_size < copy_size) &&
5929                                    ((local_size + entry_size)
5930                                     >= copy_size)) {
5931                                         vm_map_copy_clip_end(copy, copy_entry,
5932                                                              copy_entry->vme_start +
5933                                                              (copy_size - local_size));
5934                                         entry_size = copy_entry->vme_end -
5935                                                 copy_entry->vme_start;
5936                                         local_size += entry_size;
5937                                         new_offset += entry_size;
5938                                 }
5939                                 if(local_size >= copy_size) {
5940                                         next_copy = copy_entry->vme_next;
5941                                         copy_entry->vme_next =
5942                                                 vm_map_copy_to_entry(copy);
5943                                         previous_prev =
5944                                                 copy->cpy_hdr.links.prev;
5945                                         copy->cpy_hdr.links.prev = copy_entry;
5946                                         copy->size = copy_size;
5947                                         remaining_entries =
5948                                                 copy->cpy_hdr.nentries;
5949                                         remaining_entries -= nentries;
5950                                         copy->cpy_hdr.nentries = nentries;
5951                                         break;
5952                                 } else {
5953                                         local_size += entry_size;
5954                                         new_offset += entry_size;
5955                                         nentries++;
5956                                 }
5957                                 copy_entry = copy_entry->vme_next;
5958                         }
5959                 }
5960
5961                 if (aligned) {
5962                         pmap_t  local_pmap;
5963
5964                         if(pmap)
5965                                 local_pmap = pmap;
5966                         else
5967                                 local_pmap = dst_map->pmap;
5968
5969                         if ((kr =  vm_map_copy_overwrite_aligned(
5970                                      dst_map, tmp_entry, copy,
5971                                      base_addr, local_pmap)) != KERN_SUCCESS) {
5972                                 if(next_copy != NULL) {
5973                                         copy->cpy_hdr.nentries +=
5974                                                 remaining_entries;
5975                                         copy->cpy_hdr.links.prev->vme_next =
5976                                                 next_copy;
5977                                         copy->cpy_hdr.links.prev =
5978                                                 previous_prev;
5979                                         copy->size += copy_size;
5980                                 }
5981                                 return kr;
5982                         }
5983                         vm_map_unlock(dst_map);
5984                 } else {
5985                         /*
5986                          * Performance gain:
5987                          *
5988                          * if the copy and dst address are misaligned but the same
5989                          * offset within the page we can copy_not_aligned the
5990                          * misaligned parts and copy aligned the rest.  If they are
5991                          * aligned but len is unaligned we simply need to copy
5992                          * the end bit unaligned.  We'll need to split the misaligned
5993                          * bits of the region in this case !
5994                          */
5995                         /* ALWAYS UNLOCKS THE dst_map MAP */
5996                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
5997                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
5998                                 if(next_copy != NULL) {
5999                                         copy->cpy_hdr.nentries +=
6000                                                 remaining_entries;
6001                                         copy->cpy_hdr.links.prev->vme_next =
6002                                                 next_copy;
6003                                         copy->cpy_hdr.links.prev =
6004                                                 previous_prev;
6005                                         copy->size += copy_size;
6006                                 }
6007                                 return kr;
6008                         }
6009                 }
6010                 total_size -= copy_size;
6011                 if(total_size == 0)
6012                         break;
6013                 base_addr += copy_size;
6014                 copy_size = 0;
6015                 copy->offset = new_offset;
6016                 if(next_copy != NULL) {
6017                         copy->cpy_hdr.nentries = remaining_entries;
6018                         copy->cpy_hdr.links.next = next_copy;
6019                         copy->cpy_hdr.links.prev = previous_prev;
6020                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6021                         copy->size = total_size;
6022                 }
6023                 vm_map_lock(dst_map);
6024                 while(TRUE) {
6025                         if (!vm_map_lookup_entry(dst_map,
6026                                                  base_addr, &tmp_entry)) {
6027                                 vm_map_unlock(dst_map);
6028                                 return(KERN_INVALID_ADDRESS);
6029                         }
6030                         if (tmp_entry->in_transition) {
6031                                 entry->needs_wakeup = TRUE;
6032                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6033                         } else {
6034                                 break;
6035                         }
6036                 }
6037                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6038
6039                 entry = tmp_entry;
6040         } /* while */
6041
6042         /*
6043          *      Throw away the vm_map_copy object
6044          */
6045         if (discard_on_success)
6046                 vm_map_copy_discard(copy);
6047
6048         return(KERN_SUCCESS);
6049 }/* vm_map_copy_overwrite */
6050
6051 kern_return_t
6052 vm_map_copy_overwrite(
6053         vm_map_t        dst_map,
6054         vm_map_offset_t dst_addr,
6055         vm_map_copy_t   copy,
6056         boolean_t       interruptible)
6057 {
6058         vm_map_size_t   head_size, tail_size;
6059         vm_map_copy_t   head_copy, tail_copy;
6060         vm_map_offset_t head_addr, tail_addr;
6061         vm_map_entry_t  entry;
6062         kern_return_t   kr;
6063
6064         head_size = 0;
6065         tail_size = 0;
6066         head_copy = NULL;
6067         tail_copy = NULL;
6068         head_addr = 0;
6069         tail_addr = 0;
6070
6071         if (interruptible ||
6072             copy == VM_MAP_COPY_NULL ||
6073             copy->type != VM_MAP_COPY_ENTRY_LIST) {
6074                 /*
6075                  * We can't split the "copy" map if we're interruptible
6076                  * or if we don't have a "copy" map...
6077                  */
6078         blunt_copy:
6079                 return vm_map_copy_overwrite_nested(dst_map,
6080                                                     dst_addr,
6081                                                     copy,
6082                                                     interruptible,
6083                                                     (pmap_t) NULL,
6084                                                     TRUE);
6085         }
6086
6087         if (copy->size < 3 * PAGE_SIZE) {
6088                 /*
6089                  * Too small to bother with optimizing...
6090                  */
6091                 goto blunt_copy;
6092         }
6093
6094         if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6095                 /*
6096                  * Incompatible mis-alignment of source and destination...
6097                  */
6098                 goto blunt_copy;
6099         }
6100
6101         /*
6102          * Proper alignment or identical mis-alignment at the beginning.
6103          * Let's try and do a small unaligned copy first (if needed)
6104          * and then an aligned copy for the rest.
6105          */
6106         if (!page_aligned(dst_addr)) {
6107                 head_addr = dst_addr;
6108                 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6109         }
6110         if (!page_aligned(copy->offset + copy->size)) {
6111                 /*
6112                  * Mis-alignment at the end.
6113                  * Do an aligned copy up to the last page and
6114                  * then an unaligned copy for the remaining bytes.
6115                  */
6116                 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6117                 tail_addr = dst_addr + copy->size - tail_size;
6118         }
6119
6120         if (head_size + tail_size == copy->size) {
6121                 /*
6122                  * It's all unaligned, no optimization possible...
6123                  */
6124                 goto blunt_copy;
6125         }
6126
6127         /*
6128          * Can't optimize if there are any submaps in the
6129          * destination due to the way we free the "copy" map
6130          * progressively in vm_map_copy_overwrite_nested()
6131          * in that case.
6132          */
6133         vm_map_lock_read(dst_map);
6134         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6135                 vm_map_unlock_read(dst_map);
6136                 goto blunt_copy;
6137         }
6138         for (;
6139              (entry != vm_map_copy_to_entry(copy) &&
6140               entry->vme_start < dst_addr + copy->size);
6141              entry = entry->vme_next) {
6142                 if (entry->is_sub_map) {
6143                         vm_map_unlock_read(dst_map);
6144                         goto blunt_copy;
6145                 }
6146         }
6147         vm_map_unlock_read(dst_map);
6148
6149         if (head_size) {
6150                 /*
6151                  * Unaligned copy of the first "head_size" bytes, to reach
6152                  * a page boundary.
6153                  */
6154
6155                 /*
6156                  * Extract "head_copy" out of "copy".
6157                  */
6158                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6159                 vm_map_copy_first_entry(head_copy) =
6160                         vm_map_copy_to_entry(head_copy);
6161                 vm_map_copy_last_entry(head_copy) =
6162                         vm_map_copy_to_entry(head_copy);
6163                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6164                 head_copy->cpy_hdr.nentries = 0;
6165                 head_copy->cpy_hdr.entries_pageable =
6166                         copy->cpy_hdr.entries_pageable;
6167                 vm_map_store_init(&head_copy->cpy_hdr);
6168
6169                 head_copy->offset = copy->offset;
6170                 head_copy->size = head_size;
6171
6172                 copy->offset += head_size;
6173                 copy->size -= head_size;
6174
6175                 entry = vm_map_copy_first_entry(copy);
6176                 vm_map_copy_clip_end(copy, entry, copy->offset);
6177                 vm_map_copy_entry_unlink(copy, entry);
6178                 vm_map_copy_entry_link(head_copy,
6179                                        vm_map_copy_to_entry(head_copy),
6180                                        entry);
6181
6182                 /*
6183                  * Do the unaligned copy.
6184                  */
6185                 kr = vm_map_copy_overwrite_nested(dst_map,
6186                                                   head_addr,
6187                                                   head_copy,
6188                                                   interruptible,
6189                                                   (pmap_t) NULL,
6190                                                   FALSE);
6191                 if (kr != KERN_SUCCESS)
6192                         goto done;
6193         }
6194
6195         if (tail_size) {
6196                 /*
6197                  * Extract "tail_copy" out of "copy".
6198                  */
6199                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6200                 vm_map_copy_first_entry(tail_copy) =
6201                         vm_map_copy_to_entry(tail_copy);
6202                 vm_map_copy_last_entry(tail_copy) =
6203                         vm_map_copy_to_entry(tail_copy);
6204                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6205                 tail_copy->cpy_hdr.nentries = 0;
6206                 tail_copy->cpy_hdr.entries_pageable =
6207                         copy->cpy_hdr.entries_pageable;
6208                 vm_map_store_init(&tail_copy->cpy_hdr);
6209
6210                 tail_copy->offset = copy->offset + copy->size - tail_size;
6211                 tail_copy->size = tail_size;
6212
6213                 copy->size -= tail_size;
6214
6215                 entry = vm_map_copy_last_entry(copy);
6216                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6217                 entry = vm_map_copy_last_entry(copy);
6218                 vm_map_copy_entry_unlink(copy, entry);
6219                 vm_map_copy_entry_link(tail_copy,
6220                                        vm_map_copy_last_entry(tail_copy),
6221                                        entry);
6222         }
6223
6224         /*
6225          * Copy most (or possibly all) of the data.
6226          */
6227         kr = vm_map_copy_overwrite_nested(dst_map,
6228                                           dst_addr + head_size,
6229                                           copy,
6230                                           interruptible,
6231                                           (pmap_t) NULL,
6232                                           FALSE);
6233         if (kr != KERN_SUCCESS) {
6234                 goto done;
6235         }
6236
6237         if (tail_size) {
6238                 kr = vm_map_copy_overwrite_nested(dst_map,
6239                                                   tail_addr,
6240                                                   tail_copy,
6241                                                   interruptible,
6242                                                   (pmap_t) NULL,
6243                                                   FALSE);
6244         }
6245
6246 done:
6247         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6248         if (kr == KERN_SUCCESS) {
6249                 /*
6250                  * Discard all the copy maps.
6251                  */
6252                 if (head_copy) {
6253                         vm_map_copy_discard(head_copy);
6254                         head_copy = NULL;
6255                 }
6256                 vm_map_copy_discard(copy);
6257                 if (tail_copy) {
6258                         vm_map_copy_discard(tail_copy);
6259                         tail_copy = NULL;
6260                 }
6261         } else {
6262                 /*
6263                  * Re-assemble the original copy map.
6264                  */
6265                 if (head_copy) {
6266                         entry = vm_map_copy_first_entry(head_copy);
6267                         vm_map_copy_entry_unlink(head_copy, entry);
6268                         vm_map_copy_entry_link(copy,
6269                                                vm_map_copy_to_entry(copy),
6270                                                entry);
6271                         copy->offset -= head_size;
6272                         copy->size += head_size;
6273                         vm_map_copy_discard(head_copy);
6274                         head_copy = NULL;
6275                 }
6276                 if (tail_copy) {
6277                         entry = vm_map_copy_last_entry(tail_copy);
6278                         vm_map_copy_entry_unlink(tail_copy, entry);
6279                         vm_map_copy_entry_link(copy,
6280                                                vm_map_copy_last_entry(copy),
6281                                                entry);
6282                         copy->size += tail_size;
6283                         vm_map_copy_discard(tail_copy);
6284                         tail_copy = NULL;
6285                 }
6286         }
6287         return kr;
6288 }
6289
6290
6291 /*
6292  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6293  *
6294  *      Decription:
6295  *      Physically copy unaligned data
6296  *
6297  *      Implementation:
6298  *      Unaligned parts of pages have to be physically copied.  We use
6299  *      a modified form of vm_fault_copy (which understands none-aligned
6300  *      page offsets and sizes) to do the copy.  We attempt to copy as
6301  *      much memory in one go as possibly, however vm_fault_copy copies
6302  *      within 1 memory object so we have to find the smaller of "amount left"
6303  *      "source object data size" and "target object data size".  With
6304  *      unaligned data we don't need to split regions, therefore the source
6305  *      (copy) object should be one map entry, the target range may be split
6306  *      over multiple map entries however.  In any event we are pessimistic
6307  *      about these assumptions.
6308  *
6309  *      Assumptions:
6310  *      dst_map is locked on entry and is return locked on success,
6311  *      unlocked on error.
6312  */
6313
6314 static kern_return_t
6315 vm_map_copy_overwrite_unaligned(
6316         vm_map_t        dst_map,
6317         vm_map_entry_t  entry,
6318         vm_map_copy_t   copy,
6319         vm_map_offset_t start)
6320 {
6321         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6322         vm_map_version_t        version;
6323         vm_object_t             dst_object;
6324         vm_object_offset_t      dst_offset;
6325         vm_object_offset_t      src_offset;
6326         vm_object_offset_t      entry_offset;
6327         vm_map_offset_t         entry_end;
6328         vm_map_size_t           src_size,
6329                                 dst_size,
6330                                 copy_size,
6331                                 amount_left;
6332         kern_return_t           kr = KERN_SUCCESS;
6333
6334         vm_map_lock_write_to_read(dst_map);
6335
6336         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6337         amount_left = copy->size;
6338 /*
6339  *      unaligned so we never clipped this entry, we need the offset into
6340  *      the vm_object not just the data.
6341  */
6342         while (amount_left > 0) {
6343
6344                 if (entry == vm_map_to_entry(dst_map)) {
6345                         vm_map_unlock_read(dst_map);
6346                         return KERN_INVALID_ADDRESS;
6347                 }
6348
6349                 /* "start" must be within the current map entry */
6350                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6351
6352                 dst_offset = start - entry->vme_start;
6353
6354                 dst_size = entry->vme_end - start;
6355
6356                 src_size = copy_entry->vme_end -
6357                         (copy_entry->vme_start + src_offset);
6358
6359                 if (dst_size < src_size) {
6360 /*
6361  *                      we can only copy dst_size bytes before
6362  *                      we have to get the next destination entry
6363  */
6364                         copy_size = dst_size;
6365                 } else {
6366 /*
6367  *                      we can only copy src_size bytes before
6368  *                      we have to get the next source copy entry
6369  */
6370                         copy_size = src_size;
6371                 }
6372
6373                 if (copy_size > amount_left) {
6374                         copy_size = amount_left;
6375                 }
6376 /*
6377  *              Entry needs copy, create a shadow shadow object for
6378  *              Copy on write region.
6379  */
6380                 if (entry->needs_copy &&
6381                     ((entry->protection & VM_PROT_WRITE) != 0))
6382                 {
6383                         if (vm_map_lock_read_to_write(dst_map)) {
6384                                 vm_map_lock_read(dst_map);
6385                                 goto RetryLookup;
6386                         }
6387                         vm_object_shadow(&entry->object.vm_object,
6388                                          &entry->offset,
6389                                          (vm_map_size_t)(entry->vme_end
6390                                                          - entry->vme_start));
6391                         entry->needs_copy = FALSE;
6392                         vm_map_lock_write_to_read(dst_map);
6393                 }
6394                 dst_object = entry->object.vm_object;
6395 /*
6396  *              unlike with the virtual (aligned) copy we're going
6397  *              to fault on it therefore we need a target object.
6398  */
6399                 if (dst_object == VM_OBJECT_NULL) {
6400                         if (vm_map_lock_read_to_write(dst_map)) {
6401                                 vm_map_lock_read(dst_map);
6402                                 goto RetryLookup;
6403                         }
6404                         dst_object = vm_object_allocate((vm_map_size_t)
6405                                                         entry->vme_end - entry->vme_start);
6406                         entry->object.vm_object = dst_object;
6407                         entry->offset = 0;
6408                         vm_map_lock_write_to_read(dst_map);
6409                 }
6410 /*
6411  *              Take an object reference and unlock map. The "entry" may
6412  *              disappear or change when the map is unlocked.
6413  */
6414                 vm_object_reference(dst_object);
6415                 version.main_timestamp = dst_map->timestamp;
6416                 entry_offset = entry->offset;
6417                 entry_end = entry->vme_end;
6418                 vm_map_unlock_read(dst_map);
6419 /*
6420  *              Copy as much as possible in one pass
6421  */
6422                 kr = vm_fault_copy(
6423                         copy_entry->object.vm_object,
6424                         copy_entry->offset + src_offset,
6425                         &copy_size,
6426                         dst_object,
6427                         entry_offset + dst_offset,
6428                         dst_map,
6429                         &version,
6430                         THREAD_UNINT );
6431
6432                 start += copy_size;
6433                 src_offset += copy_size;
6434                 amount_left -= copy_size;
6435 /*
6436  *              Release the object reference
6437  */
6438                 vm_object_deallocate(dst_object);
6439 /*
6440  *              If a hard error occurred, return it now
6441  */
6442                 if (kr != KERN_SUCCESS)
6443                         return kr;
6444
6445                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6446                     || amount_left == 0)
6447                 {
6448 /*
6449  *                      all done with this copy entry, dispose.
6450  */
6451                         vm_map_copy_entry_unlink(copy, copy_entry);
6452                         vm_object_deallocate(copy_entry->object.vm_object);
6453                         vm_map_copy_entry_dispose(copy, copy_entry);
6454
6455                         if ((copy_entry = vm_map_copy_first_entry(copy))
6456                             == vm_map_copy_to_entry(copy) && amount_left) {
6457 /*
6458  *                              not finished copying but run out of source
6459  */
6460                                 return KERN_INVALID_ADDRESS;
6461                         }
6462                         src_offset = 0;
6463                 }
6464
6465                 if (amount_left == 0)
6466                         return KERN_SUCCESS;
6467
6468                 vm_map_lock_read(dst_map);
6469                 if (version.main_timestamp == dst_map->timestamp) {
6470                         if (start == entry_end) {
6471 /*
6472  *                              destination region is split.  Use the version
6473  *                              information to avoid a lookup in the normal
6474  *                              case.
6475  */
6476                                 entry = entry->vme_next;
6477 /*
6478  *                              should be contiguous. Fail if we encounter
6479  *                              a hole in the destination.
6480  */
6481                                 if (start != entry->vme_start) {
6482                                         vm_map_unlock_read(dst_map);
6483                                         return KERN_INVALID_ADDRESS ;
6484                                 }
6485                         }
6486                 } else {
6487 /*
6488  *                      Map version check failed.
6489  *                      we must lookup the entry because somebody
6490  *                      might have changed the map behind our backs.
6491  */
6492                 RetryLookup:
6493                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6494                         {
6495                                 vm_map_unlock_read(dst_map);
6496                                 return KERN_INVALID_ADDRESS ;
6497                         }
6498                 }
6499         }/* while */
6500
6501         return KERN_SUCCESS;
6502 }/* vm_map_copy_overwrite_unaligned */
6503
6504 /*
6505  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6506  *
6507  *      Description:
6508  *      Does all the vm_trickery possible for whole pages.
6509  *
6510  *      Implementation:
6511  *
6512  *      If there are no permanent objects in the destination,
6513  *      and the source and destination map entry zones match,
6514  *      and the destination map entry is not shared,
6515  *      then the map entries can be deleted and replaced
6516  *      with those from the copy.  The following code is the
6517  *      basic idea of what to do, but there are lots of annoying
6518  *      little details about getting protection and inheritance
6519  *      right.  Should add protection, inheritance, and sharing checks
6520  *      to the above pass and make sure that no wiring is involved.
6521  */
6522
6523 static kern_return_t
6524 vm_map_copy_overwrite_aligned(
6525         vm_map_t        dst_map,
6526         vm_map_entry_t  tmp_entry,
6527         vm_map_copy_t   copy,
6528         vm_map_offset_t start,
6529         __unused pmap_t pmap)
6530 {
6531         vm_object_t     object;
6532         vm_map_entry_t  copy_entry;
6533         vm_map_size_t   copy_size;
6534         vm_map_size_t   size;
6535         vm_map_entry_t  entry;
6536
6537         while ((copy_entry = vm_map_copy_first_entry(copy))
6538                != vm_map_copy_to_entry(copy))
6539         {
6540                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6541
6542                 entry = tmp_entry;
6543                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6544                 if (entry == vm_map_to_entry(dst_map)) {
6545                         vm_map_unlock(dst_map);
6546                         return KERN_INVALID_ADDRESS;
6547                 }
6548                 size = (entry->vme_end - entry->vme_start);
6549                 /*
6550                  *      Make sure that no holes popped up in the
6551                  *      address map, and that the protection is
6552                  *      still valid, in case the map was unlocked
6553                  *      earlier.
6554                  */
6555
6556                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6557                                                     && !entry->needs_copy)) {
6558                         vm_map_unlock(dst_map);
6559                         return(KERN_INVALID_ADDRESS);
6560                 }
6561                 assert(entry != vm_map_to_entry(dst_map));
6562
6563                 /*
6564                  *      Check protection again
6565                  */
6566
6567                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6568                         vm_map_unlock(dst_map);
6569                         return(KERN_PROTECTION_FAILURE);
6570                 }
6571
6572                 /*
6573                  *      Adjust to source size first
6574                  */
6575
6576                 if (copy_size < size) {
6577                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6578                         size = copy_size;
6579                 }
6580
6581                 /*
6582                  *      Adjust to destination size
6583                  */
6584
6585                 if (size < copy_size) {
6586                         vm_map_copy_clip_end(copy, copy_entry,
6587                                              copy_entry->vme_start + size);
6588                         copy_size = size;
6589                 }
6590
6591                 assert((entry->vme_end - entry->vme_start) == size);
6592                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6593                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6594
6595                 /*
6596                  *      If the destination contains temporary unshared memory,
6597                  *      we can perform the copy by throwing it away and
6598                  *      installing the source data.
6599                  */
6600
6601                 object = entry->object.vm_object;
6602                 if ((!entry->is_shared &&
6603                      ((object == VM_OBJECT_NULL) ||
6604                       (object->internal && !object->true_share))) ||
6605                     entry->needs_copy) {
6606                         vm_object_t     old_object = entry->object.vm_object;
6607                         vm_object_offset_t      old_offset = entry->offset;
6608                         vm_object_offset_t      offset;
6609
6610                         /*
6611                          * Ensure that the source and destination aren't
6612                          * identical
6613                          */
6614                         if (old_object == copy_entry->object.vm_object &&
6615                             old_offset == copy_entry->offset) {
6616                                 vm_map_copy_entry_unlink(copy, copy_entry);
6617                                 vm_map_copy_entry_dispose(copy, copy_entry);
6618
6619                                 if (old_object != VM_OBJECT_NULL)
6620                                         vm_object_deallocate(old_object);
6621
6622                                 start = tmp_entry->vme_end;
6623                                 tmp_entry = tmp_entry->vme_next;
6624                                 continue;
6625                         }
6626
6627                         if (entry->alias >= VM_MEMORY_MALLOC &&
6628                             entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6629                                 vm_object_t new_object, new_shadow;
6630
6631                                 /*
6632                                  * We're about to map something over a mapping
6633                                  * established by malloc()...
6634                                  */
6635                                 new_object = copy_entry->object.vm_object;
6636                                 if (new_object != VM_OBJECT_NULL) {
6637                                         vm_object_lock_shared(new_object);
6638                                 }
6639                                 while (new_object != VM_OBJECT_NULL &&
6640                                        new_object->internal) {
6641                                         new_shadow = new_object->shadow;
6642                                         if (new_shadow == VM_OBJECT_NULL) {
6643                                                 break;
6644                                         }
6645                                         vm_object_lock_shared(new_shadow);
6646                                         vm_object_unlock(new_object);
6647                                         new_object = new_shadow;
6648                                 }
6649                                 if (new_object != VM_OBJECT_NULL) {
6650                                         if (!new_object->internal) {
6651                                                 /*
6652                                                  * The new mapping is backed
6653                                                  * by an external object.  We
6654                                                  * don't want malloc'ed memory
6655                                                  * to be replaced with such a
6656                                                  * non-anonymous mapping, so
6657                                                  * let's go off the optimized
6658                                                  * path...
6659                                                  */
6660                                                 vm_object_unlock(new_object);
6661                                                 goto slow_copy;
6662                                         }
6663                                         vm_object_unlock(new_object);
6664                                 }
6665                                 /*
6666                                  * The new mapping is still backed by
6667                                  * anonymous (internal) memory, so it's
6668                                  * OK to substitute it for the original
6669                                  * malloc() mapping.
6670                                  */
6671                         }
6672
6673                         if (old_object != VM_OBJECT_NULL) {
6674                                 if(entry->is_sub_map) {
6675                                         if(entry->use_pmap) {
6676 #ifndef NO_NESTED_PMAP
6677                                                 pmap_unnest(dst_map->pmap,
6678                                                             (addr64_t)entry->vme_start,
6679                                                             entry->vme_end - entry->vme_start);
6680 #endif  /* NO_NESTED_PMAP */
6681                                                 if(dst_map->mapped) {
6682                                                         /* clean up parent */
6683                                                         /* map/maps */
6684                                                         vm_map_submap_pmap_clean(
6685                                                                 dst_map, entry->vme_start,
6686                                                                 entry->vme_end,
6687                                                                 entry->object.sub_map,
6688                                                                 entry->offset);
6689                                                 }
6690                                         } else {
6691                                                 vm_map_submap_pmap_clean(
6692                                                         dst_map, entry->vme_start,
6693                                                         entry->vme_end,
6694                                                         entry->object.sub_map,
6695                                                         entry->offset);
6696                                         }
6697                                         vm_map_deallocate(
6698                                                 entry->object.sub_map);
6699                                 } else {
6700                                         if(dst_map->mapped) {
6701                                                 vm_object_pmap_protect(
6702                                                         entry->object.vm_object,
6703                                                         entry->offset,
6704                                                         entry->vme_end
6705                                                         - entry->vme_start,
6706                                                         PMAP_NULL,
6707                                                         entry->vme_start,
6708                                                         VM_PROT_NONE);
6709                                         } else {
6710                                                 pmap_remove(dst_map->pmap,
6711                                                             (addr64_t)(entry->vme_start),
6712                                                             (addr64_t)(entry->vme_end));
6713                                         }
6714                                         vm_object_deallocate(old_object);
6715                                 }
6716                         }
6717
6718                         entry->is_sub_map = FALSE;
6719                         entry->object = copy_entry->object;
6720                         object = entry->object.vm_object;
6721                         entry->needs_copy = copy_entry->needs_copy;
6722                         entry->wired_count = 0;
6723                         entry->user_wired_count = 0;
6724                         offset = entry->offset = copy_entry->offset;
6725
6726                         vm_map_copy_entry_unlink(copy, copy_entry);
6727                         vm_map_copy_entry_dispose(copy, copy_entry);
6728
6729                         /*
6730                          * we could try to push pages into the pmap at this point, BUT
6731                          * this optimization only saved on average 2 us per page if ALL
6732                          * the pages in the source were currently mapped
6733                          * and ALL the pages in the dest were touched, if there were fewer
6734                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6735                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6736                          */
6737
6738                         /*
6739                          *      Set up for the next iteration.  The map
6740                          *      has not been unlocked, so the next
6741                          *      address should be at the end of this
6742                          *      entry, and the next map entry should be
6743                          *      the one following it.
6744                          */
6745
6746                         start = tmp_entry->vme_end;
6747                         tmp_entry = tmp_entry->vme_next;
6748                 } else {
6749                         vm_map_version_t        version;
6750                         vm_object_t             dst_object;
6751                         vm_object_offset_t      dst_offset;
6752                         kern_return_t           r;
6753
6754                 slow_copy:
6755                         dst_object = entry->object.vm_object;
6756                         dst_offset = entry->offset;
6757
6758                         /*
6759                          *      Take an object reference, and record
6760                          *      the map version information so that the
6761                          *      map can be safely unlocked.
6762                          */
6763
6764                         if (dst_object == VM_OBJECT_NULL) {
6765                                 /*
6766                                  * We would usually have just taken the
6767                                  * optimized path above if the destination
6768                                  * object has not been allocated yet.  But we
6769                                  * now disable that optimization if the copy
6770                                  * entry's object is not backed by anonymous
6771                                  * memory to avoid replacing malloc'ed
6772                                  * (i.e. re-usable) anonymous memory with a
6773                                  * not-so-anonymous mapping.
6774                                  * So we have to handle this case here and
6775                                  * allocate a new VM object for this map entry.
6776                                  */
6777                                 dst_object = vm_object_allocate(
6778                                         entry->vme_end - entry->vme_start);
6779                                 dst_offset = 0;
6780                                 entry->object.vm_object = dst_object;
6781                                 entry->offset = dst_offset;
6782
6783                         }
6784
6785                         vm_object_reference(dst_object);
6786
6787                         /* account for unlock bumping up timestamp */
6788                         version.main_timestamp = dst_map->timestamp + 1;
6789
6790                         vm_map_unlock(dst_map);
6791
6792                         /*
6793                          *      Copy as much as possible in one pass
6794                          */
6795
6796                         copy_size = size;
6797                         r = vm_fault_copy(
6798                                 copy_entry->object.vm_object,
6799                                 copy_entry->offset,
6800                                 &copy_size,
6801                                 dst_object,
6802                                 dst_offset,
6803                                 dst_map,
6804                                 &version,
6805                                 THREAD_UNINT );
6806
6807                         /*
6808                          *      Release the object reference
6809                          */
6810
6811                         vm_object_deallocate(dst_object);
6812
6813                         /*
6814                          *      If a hard error occurred, return it now
6815                          */
6816
6817                         if (r != KERN_SUCCESS)
6818                                 return(r);
6819
6820                         if (copy_size != 0) {
6821                                 /*
6822                                  *      Dispose of the copied region
6823                                  */
6824
6825                                 vm_map_copy_clip_end(copy, copy_entry,
6826                                                      copy_entry->vme_start + copy_size);
6827                                 vm_map_copy_entry_unlink(copy, copy_entry);
6828                                 vm_object_deallocate(copy_entry->object.vm_object);
6829                                 vm_map_copy_entry_dispose(copy, copy_entry);
6830                         }
6831
6832                         /*
6833                          *      Pick up in the destination map where we left off.
6834                          *
6835                          *      Use the version information to avoid a lookup
6836                          *      in the normal case.
6837                          */
6838
6839                         start += copy_size;
6840                         vm_map_lock(dst_map);
6841                         if (version.main_timestamp == dst_map->timestamp) {
6842                                 /* We can safely use saved tmp_entry value */
6843
6844                                 vm_map_clip_end(dst_map, tmp_entry, start);
6845                                 tmp_entry = tmp_entry->vme_next;
6846                         } else {
6847                                 /* Must do lookup of tmp_entry */
6848
6849                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6850                                         vm_map_unlock(dst_map);
6851                                         return(KERN_INVALID_ADDRESS);
6852                                 }
6853                                 vm_map_clip_start(dst_map, tmp_entry, start);
6854                         }
6855                 }
6856         }/* while */
6857
6858         return(KERN_SUCCESS);
6859 }/* vm_map_copy_overwrite_aligned */
6860
6861 /*
6862  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
6863  *
6864  *      Description:
6865  *              Copy in data to a kernel buffer from space in the
6866  *              source map. The original space may be optionally
6867  *              deallocated.
6868  *
6869  *              If successful, returns a new copy object.
6870  */
6871 static kern_return_t
6872 vm_map_copyin_kernel_buffer(
6873         vm_map_t        src_map,
6874         vm_map_offset_t src_addr,
6875         vm_map_size_t   len,
6876         boolean_t       src_destroy,
6877         vm_map_copy_t   *copy_result)
6878 {
6879         kern_return_t kr;
6880         vm_map_copy_t copy;
6881         vm_size_t kalloc_size;
6882
6883         if ((vm_size_t) len != len) {
6884                 /* "len" is too big and doesn't fit in a "vm_size_t" */
6885                 return KERN_RESOURCE_SHORTAGE;
6886         }
6887         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6888         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6889
6890         copy = (vm_map_copy_t) kalloc(kalloc_size);
6891         if (copy == VM_MAP_COPY_NULL) {
6892                 return KERN_RESOURCE_SHORTAGE;
6893         }
6894         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6895         copy->size = len;
6896         copy->offset = 0;
6897         copy->cpy_kdata = (void *) (copy + 1);
6898         copy->cpy_kalloc_size = kalloc_size;
6899
6900         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6901         if (kr != KERN_SUCCESS) {
6902                 kfree(copy, kalloc_size);
6903                 return kr;
6904         }
6905         if (src_destroy) {
6906                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6907                                      vm_map_round_page(src_addr + len),
6908                                      VM_MAP_REMOVE_INTERRUPTIBLE |
6909                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6910                                      (src_map == kernel_map) ?
6911                                      VM_MAP_REMOVE_KUNWIRE : 0);
6912         }
6913         *copy_result = copy;
6914         return KERN_SUCCESS;
6915 }
6916
6917 /*
6918  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
6919  *
6920  *      Description:
6921  *              Copy out data from a kernel buffer into space in the
6922  *              destination map. The space may be otpionally dynamically
6923  *              allocated.
6924  *
6925  *              If successful, consumes the copy object.
6926  *              Otherwise, the caller is responsible for it.
6927  */
6928 static int vm_map_copyout_kernel_buffer_failures = 0;
6929 static kern_return_t
6930 vm_map_copyout_kernel_buffer(
6931         vm_map_t                map,
6932         vm_map_address_t        *addr,  /* IN/OUT */
6933         vm_map_copy_t           copy,
6934         boolean_t               overwrite)
6935 {
6936         kern_return_t kr = KERN_SUCCESS;
6937         thread_t thread = current_thread();
6938
6939         if (!overwrite) {
6940
6941                 /*
6942                  * Allocate space in the target map for the data
6943                  */
6944                 *addr = 0;
6945                 kr = vm_map_enter(map,
6946                                   addr,
6947                                   vm_map_round_page(copy->size),
6948                                   (vm_map_offset_t) 0,
6949                                   VM_FLAGS_ANYWHERE,
6950                                   VM_OBJECT_NULL,
6951                                   (vm_object_offset_t) 0,
6952                                   FALSE,
6953                                   VM_PROT_DEFAULT,
6954                                   VM_PROT_ALL,
6955                                   VM_INHERIT_DEFAULT);
6956                 if (kr != KERN_SUCCESS)
6957                         return kr;
6958         }
6959
6960         /*
6961          * Copyout the data from the kernel buffer to the target map.
6962          */
6963         if (thread->map == map) {
6964
6965                 /*
6966                  * If the target map is the current map, just do
6967                  * the copy.
6968                  */
6969                 assert((vm_size_t) copy->size == copy->size);
6970                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6971                         kr = KERN_INVALID_ADDRESS;
6972                 }
6973         }
6974         else {
6975                 vm_map_t oldmap;
6976
6977                 /*
6978                  * If the target map is another map, assume the
6979                  * target's address space identity for the duration
6980                  * of the copy.
6981                  */
6982                 vm_map_reference(map);
6983                 oldmap = vm_map_switch(map);
6984
6985                 assert((vm_size_t) copy->size == copy->size);
6986                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6987                         vm_map_copyout_kernel_buffer_failures++;
6988                         kr = KERN_INVALID_ADDRESS;
6989                 }
6990
6991                 (void) vm_map_switch(oldmap);
6992                 vm_map_deallocate(map);
6993         }
6994
6995         if (kr != KERN_SUCCESS) {
6996                 /* the copy failed, clean up */
6997                 if (!overwrite) {
6998                         /*
6999                          * Deallocate the space we allocated in the target map.
7000                          */
7001                         (void) vm_map_remove(map,
7002                                              vm_map_trunc_page(*addr),
7003                                              vm_map_round_page(*addr +
7004                                                                vm_map_round_page(copy->size)),
7005                                              VM_MAP_NO_FLAGS);
7006                         *addr = 0;
7007                 }
7008         } else {
7009                 /* copy was successful, dicard the copy structure */
7010                 kfree(copy, copy->cpy_kalloc_size);
7011         }
7012
7013         return kr;
7014 }
7015
7016 /*
7017  *      Macro:          vm_map_copy_insert
7018  *
7019  *      Description:
7020  *              Link a copy chain ("copy") into a map at the
7021  *              specified location (after "where").
7022  *      Side effects:
7023  *              The copy chain is destroyed.
7024  *      Warning:
7025  *              The arguments are evaluated multiple times.
7026  */
7027 #define vm_map_copy_insert(map, where, copy)                            \
7028 MACRO_BEGIN                                                             \
7029         vm_map_store_copy_insert(map, where, copy);       \
7030         zfree(vm_map_copy_zone, copy);          \
7031 MACRO_END
7032
7033 /*
7034  *      Routine:        vm_map_copyout
7035  *
7036  *      Description:
7037  *              Copy out a copy chain ("copy") into newly-allocated
7038  *              space in the destination map.
7039  *
7040  *              If successful, consumes the copy object.
7041  *              Otherwise, the caller is responsible for it.
7042  */
7043 kern_return_t
7044 vm_map_copyout(
7045         vm_map_t                dst_map,
7046         vm_map_address_t        *dst_addr,      /* OUT */
7047         vm_map_copy_t           copy)
7048 {
7049         vm_map_size_t           size;
7050         vm_map_size_t           adjustment;
7051         vm_map_offset_t         start;
7052         vm_object_offset_t      vm_copy_start;
7053         vm_map_entry_t          last;
7054         register
7055         vm_map_entry_t          entry;
7056
7057         /*
7058          *      Check for null copy object.
7059          */
7060
7061         if (copy == VM_MAP_COPY_NULL) {
7062                 *dst_addr = 0;
7063                 return(KERN_SUCCESS);
7064         }
7065
7066         /*
7067          *      Check for special copy object, created
7068          *      by vm_map_copyin_object.
7069          */
7070
7071         if (copy->type == VM_MAP_COPY_OBJECT) {
7072                 vm_object_t             object = copy->cpy_object;
7073                 kern_return_t           kr;
7074                 vm_object_offset_t      offset;
7075
7076                 offset = vm_object_trunc_page(copy->offset);
7077                 size = vm_map_round_page(copy->size +
7078                                          (vm_map_size_t)(copy->offset - offset));
7079                 *dst_addr = 0;
7080                 kr = vm_map_enter(dst_map, dst_addr, size,
7081                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7082                                   object, offset, FALSE,
7083                                   VM_PROT_DEFAULT, VM_PROT_ALL,
7084                                   VM_INHERIT_DEFAULT);
7085                 if (kr != KERN_SUCCESS)
7086                         return(kr);
7087                 /* Account for non-pagealigned copy object */
7088                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7089                 zfree(vm_map_copy_zone, copy);
7090                 return(KERN_SUCCESS);
7091         }
7092
7093         /*
7094          *      Check for special kernel buffer allocated
7095          *      by new_ipc_kmsg_copyin.
7096          */
7097
7098         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7099                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7100                                                     copy, FALSE));
7101         }
7102
7103         /*
7104          *      Find space for the data
7105          */
7106
7107         vm_copy_start = vm_object_trunc_page(copy->offset);
7108         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7109                 - vm_copy_start;
7110
7111 StartAgain: ;
7112
7113         vm_map_lock(dst_map);
7114         if( dst_map->disable_vmentry_reuse == TRUE) {
7115                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7116                 last = entry;
7117         } else {
7118                 assert(first_free_is_valid(dst_map));
7119                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7120                 vm_map_min(dst_map) : last->vme_end;
7121         }
7122
7123         while (TRUE) {
7124                 vm_map_entry_t  next = last->vme_next;
7125                 vm_map_offset_t end = start + size;
7126
7127                 if ((end > dst_map->max_offset) || (end < start)) {
7128                         if (dst_map->wait_for_space) {
7129                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7130                                         assert_wait((event_t) dst_map,
7131                                                     THREAD_INTERRUPTIBLE);
7132                                         vm_map_unlock(dst_map);
7133                                         thread_block(THREAD_CONTINUE_NULL);
7134                                         goto StartAgain;
7135                                 }
7136                         }
7137                         vm_map_unlock(dst_map);
7138                         return(KERN_NO_SPACE);
7139                 }
7140
7141                 if ((next == vm_map_to_entry(dst_map)) ||
7142                     (next->vme_start >= end))
7143                         break;
7144
7145                 last = next;
7146                 start = last->vme_end;
7147         }
7148
7149         /*
7150          *      Since we're going to just drop the map
7151          *      entries from the copy into the destination
7152          *      map, they must come from the same pool.
7153          */
7154
7155         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7156                 /*
7157                  * Mismatches occur when dealing with the default
7158                  * pager.
7159                  */
7160                 zone_t          old_zone;
7161                 vm_map_entry_t  next, new;
7162
7163                 /*
7164                  * Find the zone that the copies were allocated from
7165                  */
7166                 old_zone = (copy->cpy_hdr.entries_pageable)
7167                         ? vm_map_entry_zone
7168                         : vm_map_kentry_zone;
7169                 entry = vm_map_copy_first_entry(copy);
7170
7171                 /*
7172                  * Reinitialize the copy so that vm_map_copy_entry_link
7173                  * will work.
7174                  */
7175                 vm_map_store_copy_reset(copy, entry);
7176                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7177
7178                 /*
7179                  * Copy each entry.
7180                  */
7181                 while (entry != vm_map_copy_to_entry(copy)) {
7182                         new = vm_map_copy_entry_create(copy);
7183                         vm_map_entry_copy_full(new, entry);
7184                         new->use_pmap = FALSE;  /* clr address space specifics */
7185                         vm_map_copy_entry_link(copy,
7186                                                vm_map_copy_last_entry(copy),
7187                                                new);
7188                         next = entry->vme_next;
7189                         zfree(old_zone, entry);
7190                         entry = next;
7191                 }
7192         }
7193
7194         /*
7195          *      Adjust the addresses in the copy chain, and
7196          *      reset the region attributes.
7197          */
7198
7199         adjustment = start - vm_copy_start;
7200         for (entry = vm_map_copy_first_entry(copy);
7201              entry != vm_map_copy_to_entry(copy);
7202              entry = entry->vme_next) {
7203                 entry->vme_start += adjustment;
7204                 entry->vme_end += adjustment;
7205
7206                 entry->inheritance = VM_INHERIT_DEFAULT;
7207                 entry->protection = VM_PROT_DEFAULT;
7208                 entry->max_protection = VM_PROT_ALL;
7209                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7210
7211                 /*
7212                  * If the entry is now wired,
7213                  * map the pages into the destination map.
7214                  */
7215                 if (entry->wired_count != 0) {
7216                         register vm_map_offset_t va;
7217                         vm_object_offset_t       offset;
7218                         register vm_object_t object;
7219                         vm_prot_t prot;
7220                         int     type_of_fault;
7221
7222                         object = entry->object.vm_object;
7223                         offset = entry->offset;
7224                         va = entry->vme_start;
7225
7226                         pmap_pageable(dst_map->pmap,
7227                                       entry->vme_start,
7228                                       entry->vme_end,
7229                                       TRUE);
7230
7231                         while (va < entry->vme_end) {
7232                                 register vm_page_t      m;
7233
7234                                 /*
7235                                  * Look up the page in the object.
7236                                  * Assert that the page will be found in the
7237                                  * top object:
7238                                  * either
7239                                  *      the object was newly created by
7240                                  *      vm_object_copy_slowly, and has
7241                                  *      copies of all of the pages from
7242                                  *      the source object
7243                                  * or
7244                                  *      the object was moved from the old
7245                                  *      map entry; because the old map
7246                                  *      entry was wired, all of the pages
7247                                  *      were in the top-level object.
7248                                  *      (XXX not true if we wire pages for
7249                                  *       reading)
7250                                  */
7251                                 vm_object_lock(object);
7252
7253                                 m = vm_page_lookup(object, offset);
7254                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7255                                     m->absent)
7256                                         panic("vm_map_copyout: wiring %p", m);
7257
7258                                 /*
7259                                  * ENCRYPTED SWAP:
7260                                  * The page is assumed to be wired here, so it
7261                                  * shouldn't be encrypted.  Otherwise, we
7262                                  * couldn't enter it in the page table, since
7263                                  * we don't want the user to see the encrypted
7264                                  * data.
7265                                  */
7266                                 ASSERT_PAGE_DECRYPTED(m);
7267
7268                                 prot = entry->protection;
7269
7270                                 if (override_nx(dst_map, entry->alias) && prot)
7271                                         prot |= VM_PROT_EXECUTE;
7272
7273                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7274
7275                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7276                                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7277                                                &type_of_fault);
7278
7279                                 vm_object_unlock(object);
7280
7281                                 offset += PAGE_SIZE_64;
7282                                 va += PAGE_SIZE;
7283                         }
7284                 }
7285         }
7286
7287         /*
7288          *      Correct the page alignment for the result
7289          */
7290
7291         *dst_addr = start + (copy->offset - vm_copy_start);
7292
7293         /*
7294          *      Update the hints and the map size
7295          */
7296
7297         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7298
7299         dst_map->size += size;
7300
7301         /*
7302          *      Link in the copy
7303          */
7304
7305         vm_map_copy_insert(dst_map, last, copy);
7306
7307         vm_map_unlock(dst_map);
7308
7309         /*
7310          * XXX  If wiring_required, call vm_map_pageable
7311          */
7312
7313         return(KERN_SUCCESS);
7314 }
7315
7316 /*
7317  *      Routine:        vm_map_copyin
7318  *
7319  *      Description:
7320  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7321  *
7322  */
7323
7324 #undef vm_map_copyin
7325
7326 kern_return_t
7327 vm_map_copyin(
7328         vm_map_t                        src_map,
7329         vm_map_address_t        src_addr,
7330         vm_map_size_t           len,
7331         boolean_t                       src_destroy,
7332         vm_map_copy_t           *copy_result)   /* OUT */
7333 {
7334         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7335                                         FALSE, copy_result, FALSE));
7336 }
7337
7338 /*
7339  *      Routine:        vm_map_copyin_common
7340  *
7341  *      Description:
7342  *              Copy the specified region (src_addr, len) from the
7343  *              source address space (src_map), possibly removing
7344  *              the region from the source address space (src_destroy).
7345  *
7346  *      Returns:
7347  *              A vm_map_copy_t object (copy_result), suitable for
7348  *              insertion into another address space (using vm_map_copyout),
7349  *              copying over another address space region (using
7350  *              vm_map_copy_overwrite).  If the copy is unused, it
7351  *              should be destroyed (using vm_map_copy_discard).
7352  *
7353  *      In/out conditions:
7354  *              The source map should not be locked on entry.
7355  */
7356
7357 typedef struct submap_map {
7358         vm_map_t        parent_map;
7359         vm_map_offset_t base_start;
7360         vm_map_offset_t base_end;
7361         vm_map_size_t   base_len;
7362         struct submap_map *next;
7363 } submap_map_t;
7364
7365 kern_return_t
7366 vm_map_copyin_common(
7367         vm_map_t        src_map,
7368         vm_map_address_t src_addr,
7369         vm_map_size_t   len,
7370         boolean_t       src_destroy,
7371         __unused boolean_t      src_volatile,
7372         vm_map_copy_t   *copy_result,   /* OUT */
7373         boolean_t       use_maxprot)
7374 {
7375         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7376                                          * in multi-level lookup, this
7377                                          * entry contains the actual
7378                                          * vm_object/offset.
7379                                          */
7380         register
7381         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7382
7383         vm_map_offset_t src_start;      /* Start of current entry --
7384                                          * where copy is taking place now
7385                                          */
7386         vm_map_offset_t src_end;        /* End of entire region to be
7387                                          * copied */
7388         vm_map_offset_t src_base;
7389         vm_map_t        base_map = src_map;
7390         boolean_t       map_share=FALSE;
7391         submap_map_t    *parent_maps = NULL;
7392
7393         register
7394         vm_map_copy_t   copy;           /* Resulting copy */
7395         vm_map_address_t        copy_addr;
7396
7397         /*
7398          *      Check for copies of zero bytes.
7399          */
7400
7401         if (len == 0) {
7402                 *copy_result = VM_MAP_COPY_NULL;
7403                 return(KERN_SUCCESS);
7404         }
7405
7406         /*
7407          *      Check that the end address doesn't overflow
7408          */
7409         src_end = src_addr + len;
7410         if (src_end < src_addr)
7411                 return KERN_INVALID_ADDRESS;
7412
7413         /*
7414          * If the copy is sufficiently small, use a kernel buffer instead
7415          * of making a virtual copy.  The theory being that the cost of
7416          * setting up VM (and taking C-O-W faults) dominates the copy costs
7417          * for small regions.
7418          */
7419         if ((len < msg_ool_size_small) && !use_maxprot)
7420                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7421                                                    src_destroy, copy_result);
7422
7423         /*
7424          *      Compute (page aligned) start and end of region
7425          */
7426         src_start = vm_map_trunc_page(src_addr);
7427         src_end = vm_map_round_page(src_end);
7428
7429         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7430
7431         /*
7432          *      Allocate a header element for the list.
7433          *
7434          *      Use the start and end in the header to
7435          *      remember the endpoints prior to rounding.
7436          */
7437
7438         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7439         vm_map_copy_first_entry(copy) =
7440                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7441         copy->type = VM_MAP_COPY_ENTRY_LIST;
7442         copy->cpy_hdr.nentries = 0;
7443         copy->cpy_hdr.entries_pageable = TRUE;
7444
7445         vm_map_store_init( &(copy->cpy_hdr) );
7446
7447         copy->offset = src_addr;
7448         copy->size = len;
7449
7450         new_entry = vm_map_copy_entry_create(copy);
7451
7452 #define RETURN(x)                                               \
7453         MACRO_BEGIN                                             \
7454         vm_map_unlock(src_map);                                 \
7455         if(src_map != base_map)                                 \
7456                 vm_map_deallocate(src_map);                     \
7457         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7458                 vm_map_copy_entry_dispose(copy,new_entry);      \
7459         vm_map_copy_discard(copy);                              \
7460         {                                                       \
7461                 submap_map_t    *_ptr;                          \
7462                                                                 \
7463                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7464                         parent_maps=parent_maps->next;          \
7465                         if (_ptr->parent_map != base_map)       \
7466                                 vm_map_deallocate(_ptr->parent_map);    \
7467                         kfree(_ptr, sizeof(submap_map_t));      \
7468                 }                                               \
7469         }                                                       \
7470         MACRO_RETURN(x);                                        \
7471         MACRO_END
7472
7473         /*
7474          *      Find the beginning of the region.
7475          */
7476
7477         vm_map_lock(src_map);
7478
7479         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7480                 RETURN(KERN_INVALID_ADDRESS);
7481         if(!tmp_entry->is_sub_map) {
7482                 vm_map_clip_start(src_map, tmp_entry, src_start);
7483         }
7484         /* set for later submap fix-up */
7485         copy_addr = src_start;
7486
7487         /*
7488          *      Go through entries until we get to the end.
7489          */
7490
7491         while (TRUE) {
7492                 register
7493                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7494                 vm_map_size_t   src_size;               /* Size of source
7495                                                          * map entry (in both
7496                                                          * maps)
7497                                                          */
7498
7499                 register
7500                 vm_object_t             src_object;     /* Object to copy */
7501                 vm_object_offset_t      src_offset;
7502
7503                 boolean_t       src_needs_copy;         /* Should source map
7504                                                          * be made read-only
7505                                                          * for copy-on-write?
7506                                                          */
7507
7508                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7509
7510                 boolean_t       was_wired;              /* Was source wired? */
7511                 vm_map_version_t version;               /* Version before locks
7512                                                          * dropped to make copy
7513                                                          */
7514                 kern_return_t   result;                 /* Return value from
7515                                                          * copy_strategically.
7516                                                          */
7517                 while(tmp_entry->is_sub_map) {
7518                         vm_map_size_t submap_len;
7519                         submap_map_t *ptr;
7520
7521                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7522                         ptr->next = parent_maps;
7523                         parent_maps = ptr;
7524                         ptr->parent_map = src_map;
7525                         ptr->base_start = src_start;
7526                         ptr->base_end = src_end;
7527                         submap_len = tmp_entry->vme_end - src_start;
7528                         if(submap_len > (src_end-src_start))
7529                                 submap_len = src_end-src_start;
7530                         ptr->base_len = submap_len;
7531
7532                         src_start -= tmp_entry->vme_start;
7533                         src_start += tmp_entry->offset;
7534                         src_end = src_start + submap_len;
7535                         src_map = tmp_entry->object.sub_map;
7536                         vm_map_lock(src_map);
7537                         /* keep an outstanding reference for all maps in */
7538                         /* the parents tree except the base map */
7539                         vm_map_reference(src_map);
7540                         vm_map_unlock(ptr->parent_map);
7541                         if (!vm_map_lookup_entry(
7542                                     src_map, src_start, &tmp_entry))
7543                                 RETURN(KERN_INVALID_ADDRESS);
7544                         map_share = TRUE;
7545                         if(!tmp_entry->is_sub_map)
7546                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7547                         src_entry = tmp_entry;
7548                 }
7549                 /* we are now in the lowest level submap... */
7550
7551                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7552                     (tmp_entry->object.vm_object->phys_contiguous)) {
7553                         /* This is not, supported for now.In future */
7554                         /* we will need to detect the phys_contig   */
7555                         /* condition and then upgrade copy_slowly   */
7556                         /* to do physical copy from the device mem  */
7557                         /* based object. We can piggy-back off of   */
7558                         /* the was wired boolean to set-up the      */
7559                         /* proper handling */
7560                         RETURN(KERN_PROTECTION_FAILURE);
7561                 }
7562                 /*
7563                  *      Create a new address map entry to hold the result.
7564                  *      Fill in the fields from the appropriate source entries.
7565                  *      We must unlock the source map to do this if we need
7566                  *      to allocate a map entry.
7567                  */
7568                 if (new_entry == VM_MAP_ENTRY_NULL) {
7569                         version.main_timestamp = src_map->timestamp;
7570                         vm_map_unlock(src_map);
7571
7572                         new_entry = vm_map_copy_entry_create(copy);
7573
7574                         vm_map_lock(src_map);
7575                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7576                                 if (!vm_map_lookup_entry(src_map, src_start,
7577                                                          &tmp_entry)) {
7578                                         RETURN(KERN_INVALID_ADDRESS);
7579                                 }
7580                                 if (!tmp_entry->is_sub_map)
7581                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7582                                 continue; /* restart w/ new tmp_entry */
7583                         }
7584                 }
7585
7586                 /*
7587                  *      Verify that the region can be read.
7588                  */
7589                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7590                      !use_maxprot) ||
7591                     (src_entry->max_protection & VM_PROT_READ) == 0)
7592                         RETURN(KERN_PROTECTION_FAILURE);
7593
7594                 /*
7595                  *      Clip against the endpoints of the entire region.
7596                  */
7597
7598                 vm_map_clip_end(src_map, src_entry, src_end);
7599
7600                 src_size = src_entry->vme_end - src_start;
7601                 src_object = src_entry->object.vm_object;
7602                 src_offset = src_entry->offset;
7603                 was_wired = (src_entry->wired_count != 0);
7604
7605                 vm_map_entry_copy(new_entry, src_entry);
7606                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7607
7608                 /*
7609                  *      Attempt non-blocking copy-on-write optimizations.
7610                  */
7611
7612                 if (src_destroy &&
7613                     (src_object == VM_OBJECT_NULL ||
7614                      (src_object->internal && !src_object->true_share
7615                       && !map_share))) {
7616                         /*
7617                          * If we are destroying the source, and the object
7618                          * is internal, we can move the object reference
7619                          * from the source to the copy.  The copy is
7620                          * copy-on-write only if the source is.
7621                          * We make another reference to the object, because
7622                          * destroying the source entry will deallocate it.
7623                          */
7624                         vm_object_reference(src_object);
7625
7626                         /*
7627                          * Copy is always unwired.  vm_map_copy_entry
7628                          * set its wired count to zero.
7629                          */
7630
7631                         goto CopySuccessful;
7632                 }
7633
7634
7635         RestartCopy:
7636                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7637                     src_object, new_entry, new_entry->object.vm_object,
7638                     was_wired, 0);
7639                 if ((src_object == VM_OBJECT_NULL ||
7640                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7641                     vm_object_copy_quickly(
7642                             &new_entry->object.vm_object,
7643                             src_offset,
7644                             src_size,
7645                             &src_needs_copy,
7646                             &new_entry_needs_copy)) {
7647
7648                         new_entry->needs_copy = new_entry_needs_copy;
7649
7650                         /*
7651                          *      Handle copy-on-write obligations
7652                          */
7653
7654                         if (src_needs_copy && !tmp_entry->needs_copy) {
7655                                 vm_prot_t prot;
7656
7657                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7658
7659                                 if (override_nx(src_map, src_entry->alias) && prot)
7660                                         prot |= VM_PROT_EXECUTE;
7661
7662                                 vm_object_pmap_protect(
7663                                         src_object,
7664                                         src_offset,
7665                                         src_size,
7666                                         (src_entry->is_shared ?
7667                                          PMAP_NULL
7668                                          : src_map->pmap),
7669                                         src_entry->vme_start,
7670                                         prot);
7671
7672                                 tmp_entry->needs_copy = TRUE;
7673                         }
7674
7675                         /*
7676                          *      The map has never been unlocked, so it's safe
7677                          *      to move to the next entry rather than doing
7678                          *      another lookup.
7679                          */
7680
7681                         goto CopySuccessful;
7682                 }
7683
7684                 /*
7685                  *      Take an object reference, so that we may
7686                  *      release the map lock(s).
7687                  */
7688
7689                 assert(src_object != VM_OBJECT_NULL);
7690                 vm_object_reference(src_object);
7691
7692                 /*
7693                  *      Record the timestamp for later verification.
7694                  *      Unlock the map.
7695                  */
7696
7697                 version.main_timestamp = src_map->timestamp;
7698                 vm_map_unlock(src_map); /* Increments timestamp once! */
7699
7700                 /*
7701                  *      Perform the copy
7702                  */
7703
7704                 if (was_wired) {
7705                 CopySlowly:
7706                         vm_object_lock(src_object);
7707                         result = vm_object_copy_slowly(
7708                                 src_object,
7709                                 src_offset,
7710                                 src_size,
7711                                 THREAD_UNINT,
7712                                 &new_entry->object.vm_object);
7713                         new_entry->offset = 0;
7714                         new_entry->needs_copy = FALSE;
7715
7716                 }
7717                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7718                          (tmp_entry->is_shared  || map_share)) {
7719                         vm_object_t new_object;
7720
7721                         vm_object_lock_shared(src_object);
7722                         new_object = vm_object_copy_delayed(
7723                                 src_object,
7724                                 src_offset,
7725                                 src_size,
7726                                 TRUE);
7727                         if (new_object == VM_OBJECT_NULL)
7728                                 goto CopySlowly;
7729
7730                         new_entry->object.vm_object = new_object;
7731                         new_entry->needs_copy = TRUE;
7732                         result = KERN_SUCCESS;
7733
7734                 } else {
7735                         result = vm_object_copy_strategically(src_object,
7736                                                               src_offset,
7737                                                               src_size,
7738                                                               &new_entry->object.vm_object,
7739                                                               &new_entry->offset,
7740                                                               &new_entry_needs_copy);
7741
7742                         new_entry->needs_copy = new_entry_needs_copy;
7743                 }
7744
7745                 if (result != KERN_SUCCESS &&
7746                     result != KERN_MEMORY_RESTART_COPY) {
7747                         vm_map_lock(src_map);
7748                         RETURN(result);
7749                 }
7750
7751                 /*
7752                  *      Throw away the extra reference
7753                  */
7754
7755                 vm_object_deallocate(src_object);
7756
7757                 /*
7758                  *      Verify that the map has not substantially
7759                  *      changed while the copy was being made.
7760                  */
7761
7762                 vm_map_lock(src_map);
7763
7764                 if ((version.main_timestamp + 1) == src_map->timestamp)
7765                         goto VerificationSuccessful;
7766
7767                 /*
7768                  *      Simple version comparison failed.
7769                  *
7770                  *      Retry the lookup and verify that the
7771                  *      same object/offset are still present.
7772                  *
7773                  *      [Note: a memory manager that colludes with
7774                  *      the calling task can detect that we have
7775                  *      cheated.  While the map was unlocked, the
7776                  *      mapping could have been changed and restored.]
7777                  */
7778
7779                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7780                         RETURN(KERN_INVALID_ADDRESS);
7781                 }
7782
7783                 src_entry = tmp_entry;
7784                 vm_map_clip_start(src_map, src_entry, src_start);
7785
7786                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7787                      !use_maxprot) ||
7788                     ((src_entry->max_protection & VM_PROT_READ) == 0))
7789                         goto VerificationFailed;
7790
7791                 if (src_entry->vme_end < new_entry->vme_end)
7792                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7793
7794                 if ((src_entry->object.vm_object != src_object) ||
7795                     (src_entry->offset != src_offset) ) {
7796
7797                         /*
7798                          *      Verification failed.
7799                          *
7800                          *      Start over with this top-level entry.
7801                          */
7802
7803                 VerificationFailed: ;
7804
7805                         vm_object_deallocate(new_entry->object.vm_object);
7806                         tmp_entry = src_entry;
7807                         continue;
7808                 }
7809
7810                 /*
7811                  *      Verification succeeded.
7812                  */
7813
7814         VerificationSuccessful: ;
7815
7816                 if (result == KERN_MEMORY_RESTART_COPY)
7817                         goto RestartCopy;
7818
7819                 /*
7820                  *      Copy succeeded.
7821                  */
7822
7823         CopySuccessful: ;
7824
7825                 /*
7826                  *      Link in the new copy entry.
7827                  */
7828
7829                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7830                                        new_entry);
7831
7832                 /*
7833                  *      Determine whether the entire region
7834                  *      has been copied.
7835                  */
7836                 src_base = src_start;
7837                 src_start = new_entry->vme_end;
7838                 new_entry = VM_MAP_ENTRY_NULL;
7839                 while ((src_start >= src_end) && (src_end != 0)) {
7840                         if (src_map != base_map) {
7841                                 submap_map_t    *ptr;
7842
7843                                 ptr = parent_maps;
7844                                 assert(ptr != NULL);
7845                                 parent_maps = parent_maps->next;
7846
7847                                 /* fix up the damage we did in that submap */
7848                                 vm_map_simplify_range(src_map,
7849                                                       src_base,
7850                                                       src_end);
7851
7852                                 vm_map_unlock(src_map);
7853                                 vm_map_deallocate(src_map);
7854                                 vm_map_lock(ptr->parent_map);
7855                                 src_map = ptr->parent_map;
7856                                 src_base = ptr->base_start;
7857                                 src_start = ptr->base_start + ptr->base_len;
7858                                 src_end = ptr->base_end;
7859                                 if ((src_end > src_start) &&
7860                                     !vm_map_lookup_entry(
7861                                             src_map, src_start, &tmp_entry))
7862                                         RETURN(KERN_INVALID_ADDRESS);
7863                                 kfree(ptr, sizeof(submap_map_t));
7864                                 if(parent_maps == NULL)
7865                                         map_share = FALSE;
7866                                 src_entry = tmp_entry->vme_prev;
7867                         } else
7868                                 break;
7869                 }
7870                 if ((src_start >= src_end) && (src_end != 0))
7871                         break;
7872
7873                 /*
7874                  *      Verify that there are no gaps in the region
7875                  */
7876
7877                 tmp_entry = src_entry->vme_next;
7878                 if ((tmp_entry->vme_start != src_start) ||
7879                     (tmp_entry == vm_map_to_entry(src_map)))
7880                         RETURN(KERN_INVALID_ADDRESS);
7881         }
7882
7883         /*
7884          * If the source should be destroyed, do it now, since the
7885          * copy was successful.
7886          */
7887         if (src_destroy) {
7888                 (void) vm_map_delete(src_map,
7889                                      vm_map_trunc_page(src_addr),
7890                                      src_end,
7891                                      (src_map == kernel_map) ?
7892                                      VM_MAP_REMOVE_KUNWIRE :
7893                                      VM_MAP_NO_FLAGS,
7894                                      VM_MAP_NULL);
7895         } else {
7896                 /* fix up the damage we did in the base map */
7897                 vm_map_simplify_range(src_map,
7898                                       vm_map_trunc_page(src_addr),
7899                                       vm_map_round_page(src_end));
7900         }
7901
7902         vm_map_unlock(src_map);
7903
7904         /* Fix-up start and end points in copy.  This is necessary */
7905         /* when the various entries in the copy object were picked */
7906         /* up from different sub-maps */
7907
7908         tmp_entry = vm_map_copy_first_entry(copy);
7909         while (tmp_entry != vm_map_copy_to_entry(copy)) {
7910                 tmp_entry->vme_end = copy_addr +
7911                         (tmp_entry->vme_end - tmp_entry->vme_start);
7912                 tmp_entry->vme_start = copy_addr;
7913                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7914                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7915         }
7916
7917         *copy_result = copy;
7918         return(KERN_SUCCESS);
7919
7920 #undef  RETURN
7921 }
7922
7923 /*
7924  *      vm_map_copyin_object:
7925  *
7926  *      Create a copy object from an object.
7927  *      Our caller donates an object reference.
7928  */
7929
7930 kern_return_t
7931 vm_map_copyin_object(
7932         vm_object_t             object,
7933         vm_object_offset_t      offset, /* offset of region in object */
7934         vm_object_size_t        size,   /* size of region in object */
7935         vm_map_copy_t   *copy_result)   /* OUT */
7936 {
7937         vm_map_copy_t   copy;           /* Resulting copy */
7938
7939         /*
7940          *      We drop the object into a special copy object
7941          *      that contains the object directly.
7942          */
7943
7944         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7945         copy->type = VM_MAP_COPY_OBJECT;
7946         copy->cpy_object = object;
7947         copy->offset = offset;
7948         copy->size = size;
7949
7950         *copy_result = copy;
7951         return(KERN_SUCCESS);
7952 }
7953
7954 static void
7955 vm_map_fork_share(
7956         vm_map_t        old_map,
7957         vm_map_entry_t  old_entry,
7958         vm_map_t        new_map)
7959 {
7960         vm_object_t     object;
7961         vm_map_entry_t  new_entry;
7962
7963         /*
7964          *      New sharing code.  New map entry
7965          *      references original object.  Internal
7966          *      objects use asynchronous copy algorithm for
7967          *      future copies.  First make sure we have
7968          *      the right object.  If we need a shadow,
7969          *      or someone else already has one, then
7970          *      make a new shadow and share it.
7971          */
7972
7973         object = old_entry->object.vm_object;
7974         if (old_entry->is_sub_map) {
7975                 assert(old_entry->wired_count == 0);
7976 #ifndef NO_NESTED_PMAP
7977                 if(old_entry->use_pmap) {
7978                         kern_return_t   result;
7979
7980                         result = pmap_nest(new_map->pmap,
7981                                            (old_entry->object.sub_map)->pmap,
7982                                            (addr64_t)old_entry->vme_start,
7983                                            (addr64_t)old_entry->vme_start,
7984                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7985                         if(result)
7986                                 panic("vm_map_fork_share: pmap_nest failed!");
7987                 }
7988 #endif  /* NO_NESTED_PMAP */
7989         } else if (object == VM_OBJECT_NULL) {
7990                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7991                                                             old_entry->vme_start));
7992                 old_entry->offset = 0;
7993                 old_entry->object.vm_object = object;
7994                 assert(!old_entry->needs_copy);
7995         } else if (object->copy_strategy !=
7996                    MEMORY_OBJECT_COPY_SYMMETRIC) {
7997
7998                 /*
7999                  *      We are already using an asymmetric
8000                  *      copy, and therefore we already have
8001                  *      the right object.
8002                  */
8003
8004                 assert(! old_entry->needs_copy);
8005         }
8006         else if (old_entry->needs_copy ||       /* case 1 */
8007                  object->shadowed ||            /* case 2 */
8008                  (!object->true_share &&        /* case 3 */
8009                   !old_entry->is_shared &&
8010                   (object->vo_size >
8011                    (vm_map_size_t)(old_entry->vme_end -
8012                                    old_entry->vme_start)))) {
8013
8014                 /*
8015                  *      We need to create a shadow.
8016                  *      There are three cases here.
8017                  *      In the first case, we need to
8018                  *      complete a deferred symmetrical
8019                  *      copy that we participated in.
8020                  *      In the second and third cases,
8021                  *      we need to create the shadow so
8022                  *      that changes that we make to the
8023                  *      object do not interfere with
8024                  *      any symmetrical copies which
8025                  *      have occured (case 2) or which
8026                  *      might occur (case 3).
8027                  *
8028                  *      The first case is when we had
8029                  *      deferred shadow object creation
8030                  *      via the entry->needs_copy mechanism.
8031                  *      This mechanism only works when
8032                  *      only one entry points to the source
8033                  *      object, and we are about to create
8034                  *      a second entry pointing to the
8035                  *      same object. The problem is that
8036                  *      there is no way of mapping from
8037                  *      an object to the entries pointing
8038                  *      to it. (Deferred shadow creation
8039                  *      works with one entry because occurs
8040                  *      at fault time, and we walk from the
8041                  *      entry to the object when handling
8042                  *      the fault.)
8043                  *
8044                  *      The second case is when the object
8045                  *      to be shared has already been copied
8046                  *      with a symmetric copy, but we point
8047                  *      directly to the object without
8048                  *      needs_copy set in our entry. (This
8049                  *      can happen because different ranges
8050                  *      of an object can be pointed to by
8051                  *      different entries. In particular,
8052                  *      a single entry pointing to an object
8053                  *      can be split by a call to vm_inherit,
8054                  *      which, combined with task_create, can
8055                  *      result in the different entries
8056                  *      having different needs_copy values.)
8057                  *      The shadowed flag in the object allows
8058                  *      us to detect this case. The problem
8059                  *      with this case is that if this object
8060                  *      has or will have shadows, then we
8061                  *      must not perform an asymmetric copy
8062                  *      of this object, since such a copy
8063                  *      allows the object to be changed, which
8064                  *      will break the previous symmetrical
8065                  *      copies (which rely upon the object
8066                  *      not changing). In a sense, the shadowed
8067                  *      flag says "don't change this object".
8068                  *      We fix this by creating a shadow
8069                  *      object for this object, and sharing
8070                  *      that. This works because we are free
8071                  *      to change the shadow object (and thus
8072                  *      to use an asymmetric copy strategy);
8073                  *      this is also semantically correct,
8074                  *      since this object is temporary, and
8075                  *      therefore a copy of the object is
8076                  *      as good as the object itself. (This
8077                  *      is not true for permanent objects,
8078                  *      since the pager needs to see changes,
8079                  *      which won't happen if the changes
8080                  *      are made to a copy.)
8081                  *
8082                  *      The third case is when the object
8083                  *      to be shared has parts sticking
8084                  *      outside of the entry we're working
8085                  *      with, and thus may in the future
8086                  *      be subject to a symmetrical copy.
8087                  *      (This is a preemptive version of
8088                  *      case 2.)
8089                  */
8090                 vm_object_shadow(&old_entry->object.vm_object,
8091                                  &old_entry->offset,
8092                                  (vm_map_size_t) (old_entry->vme_end -
8093                                                   old_entry->vme_start));
8094
8095                 /*
8096                  *      If we're making a shadow for other than
8097                  *      copy on write reasons, then we have
8098                  *      to remove write permission.
8099                  */
8100
8101                 if (!old_entry->needs_copy &&
8102                     (old_entry->protection & VM_PROT_WRITE)) {
8103                         vm_prot_t prot;
8104
8105                         prot = old_entry->protection & ~VM_PROT_WRITE;
8106
8107                         if (override_nx(old_map, old_entry->alias) && prot)
8108                                 prot |= VM_PROT_EXECUTE;
8109
8110                         if (old_map->mapped) {
8111                                 vm_object_pmap_protect(
8112                                         old_entry->object.vm_object,
8113                                         old_entry->offset,
8114                                         (old_entry->vme_end -
8115                                          old_entry->vme_start),
8116                                         PMAP_NULL,
8117                                         old_entry->vme_start,
8118                                         prot);
8119                         } else {
8120                                 pmap_protect(old_map->pmap,
8121                                              old_entry->vme_start,
8122                                              old_entry->vme_end,
8123                                              prot);
8124                         }
8125                 }
8126
8127                 old_entry->needs_copy = FALSE;
8128                 object = old_entry->object.vm_object;
8129         }
8130
8131
8132         /*
8133          *      If object was using a symmetric copy strategy,
8134          *      change its copy strategy to the default
8135          *      asymmetric copy strategy, which is copy_delay
8136          *      in the non-norma case and copy_call in the
8137          *      norma case. Bump the reference count for the
8138          *      new entry.
8139          */
8140
8141         if(old_entry->is_sub_map) {
8142                 vm_map_lock(old_entry->object.sub_map);
8143                 vm_map_reference(old_entry->object.sub_map);
8144                 vm_map_unlock(old_entry->object.sub_map);
8145         } else {
8146                 vm_object_lock(object);
8147                 vm_object_reference_locked(object);
8148                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8149                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8150                 }
8151                 vm_object_unlock(object);
8152         }
8153
8154         /*
8155          *      Clone the entry, using object ref from above.
8156          *      Mark both entries as shared.
8157          */
8158
8159         new_entry = vm_map_entry_create(new_map);
8160         vm_map_entry_copy(new_entry, old_entry);
8161         old_entry->is_shared = TRUE;
8162         new_entry->is_shared = TRUE;
8163
8164         /*
8165          *      Insert the entry into the new map -- we
8166          *      know we're inserting at the end of the new
8167          *      map.
8168          */
8169
8170         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8171
8172         /*
8173          *      Update the physical map
8174          */
8175
8176         if (old_entry->is_sub_map) {
8177                 /* Bill Angell pmap support goes here */
8178         } else {
8179                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8180                           old_entry->vme_end - old_entry->vme_start,
8181                           old_entry->vme_start);
8182         }
8183 }
8184
8185 static boolean_t
8186 vm_map_fork_copy(
8187         vm_map_t        old_map,
8188         vm_map_entry_t  *old_entry_p,
8189         vm_map_t        new_map)
8190 {
8191         vm_map_entry_t old_entry = *old_entry_p;
8192         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8193         vm_map_offset_t start = old_entry->vme_start;
8194         vm_map_copy_t copy;
8195         vm_map_entry_t last = vm_map_last_entry(new_map);
8196
8197         vm_map_unlock(old_map);
8198         /*
8199          *      Use maxprot version of copyin because we
8200          *      care about whether this memory can ever
8201          *      be accessed, not just whether it's accessible
8202          *      right now.
8203          */
8204         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8205             != KERN_SUCCESS) {
8206                 /*
8207                  *      The map might have changed while it
8208                  *      was unlocked, check it again.  Skip
8209                  *      any blank space or permanently
8210                  *      unreadable region.
8211                  */
8212                 vm_map_lock(old_map);
8213                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8214                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8215                         last = last->vme_next;
8216                 }
8217                 *old_entry_p = last;
8218
8219                 /*
8220                  * XXX  For some error returns, want to
8221                  * XXX  skip to the next element.  Note
8222                  *      that INVALID_ADDRESS and
8223                  *      PROTECTION_FAILURE are handled above.
8224                  */
8225
8226                 return FALSE;
8227         }
8228
8229         /*
8230          *      Insert the copy into the new map
8231          */
8232
8233         vm_map_copy_insert(new_map, last, copy);
8234
8235         /*
8236          *      Pick up the traversal at the end of
8237          *      the copied region.
8238          */
8239
8240         vm_map_lock(old_map);
8241         start += entry_size;
8242         if (! vm_map_lookup_entry(old_map, start, &last)) {
8243                 last = last->vme_next;
8244         } else {
8245                 if (last->vme_start == start) {
8246                         /*
8247                          * No need to clip here and we don't
8248                          * want to cause any unnecessary
8249                          * unnesting...
8250                          */
8251                 } else {
8252                         vm_map_clip_start(old_map, last, start);
8253                 }
8254         }
8255         *old_entry_p = last;
8256
8257         return TRUE;
8258 }
8259
8260 /*
8261  *      vm_map_fork:
8262  *
8263  *      Create and return a new map based on the old
8264  *      map, according to the inheritance values on the
8265  *      regions in that map.
8266  *
8267  *      The source map must not be locked.
8268  */
8269 vm_map_t
8270 vm_map_fork(
8271         vm_map_t        old_map)
8272 {
8273         pmap_t          new_pmap;
8274         vm_map_t        new_map;
8275         vm_map_entry_t  old_entry;
8276         vm_map_size_t   new_size = 0, entry_size;
8277         vm_map_entry_t  new_entry;
8278         boolean_t       src_needs_copy;
8279         boolean_t       new_entry_needs_copy;
8280
8281         new_pmap = pmap_create((vm_map_size_t) 0,
8282 #if defined(__i386__) || defined(__x86_64__)
8283                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8284 #else
8285                                0
8286 #endif
8287                                );
8288 #if defined(__i386__)
8289         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8290                 pmap_set_4GB_pagezero(new_pmap);
8291 #endif
8292
8293         vm_map_reference_swap(old_map);
8294         vm_map_lock(old_map);
8295
8296         new_map = vm_map_create(new_pmap,
8297                                 old_map->min_offset,
8298                                 old_map->max_offset,
8299                                 old_map->hdr.entries_pageable);
8300         for (
8301                 old_entry = vm_map_first_entry(old_map);
8302                 old_entry != vm_map_to_entry(old_map);
8303                 ) {
8304
8305                 entry_size = old_entry->vme_end - old_entry->vme_start;
8306
8307                 switch (old_entry->inheritance) {
8308                 case VM_INHERIT_NONE:
8309                         break;
8310
8311                 case VM_INHERIT_SHARE:
8312                         vm_map_fork_share(old_map, old_entry, new_map);
8313                         new_size += entry_size;
8314                         break;
8315
8316                 case VM_INHERIT_COPY:
8317
8318                         /*
8319                          *      Inline the copy_quickly case;
8320                          *      upon failure, fall back on call
8321                          *      to vm_map_fork_copy.
8322                          */
8323
8324                         if(old_entry->is_sub_map)
8325                                 break;
8326                         if ((old_entry->wired_count != 0) ||
8327                             ((old_entry->object.vm_object != NULL) &&
8328                              (old_entry->object.vm_object->true_share))) {
8329                                 goto slow_vm_map_fork_copy;
8330                         }
8331
8332                         new_entry = vm_map_entry_create(new_map);
8333                         vm_map_entry_copy(new_entry, old_entry);
8334                         /* clear address space specifics */
8335                         new_entry->use_pmap = FALSE;
8336
8337                         if (! vm_object_copy_quickly(
8338                                     &new_entry->object.vm_object,
8339                                     old_entry->offset,
8340                                     (old_entry->vme_end -
8341                                      old_entry->vme_start),
8342                                     &src_needs_copy,
8343                                     &new_entry_needs_copy)) {
8344                                 vm_map_entry_dispose(new_map, new_entry);
8345                                 goto slow_vm_map_fork_copy;
8346                         }
8347
8348                         /*
8349                          *      Handle copy-on-write obligations
8350                          */
8351
8352                         if (src_needs_copy && !old_entry->needs_copy) {
8353                                 vm_prot_t prot;
8354
8355                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8356
8357                                 if (override_nx(old_map, old_entry->alias) && prot)
8358                                         prot |= VM_PROT_EXECUTE;
8359
8360                                 vm_object_pmap_protect(
8361                                         old_entry->object.vm_object,
8362                                         old_entry->offset,
8363                                         (old_entry->vme_end -
8364                                          old_entry->vme_start),
8365                                         ((old_entry->is_shared
8366                                           || old_map->mapped)
8367                                          ? PMAP_NULL :
8368                                          old_map->pmap),
8369                                         old_entry->vme_start,
8370                                         prot);
8371
8372                                 old_entry->needs_copy = TRUE;
8373                         }
8374                         new_entry->needs_copy = new_entry_needs_copy;
8375
8376                         /*
8377                          *      Insert the entry at the end
8378                          *      of the map.
8379                          */
8380
8381                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8382                                           new_entry);
8383                         new_size += entry_size;
8384                         break;
8385
8386                 slow_vm_map_fork_copy:
8387                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8388                                 new_size += entry_size;
8389                         }
8390                         continue;
8391                 }
8392                 old_entry = old_entry->vme_next;
8393         }
8394
8395         new_map->size = new_size;
8396         vm_map_unlock(old_map);
8397         vm_map_deallocate(old_map);
8398
8399         return(new_map);
8400 }
8401
8402 /*
8403  * vm_map_exec:
8404  *
8405  *      Setup the "new_map" with the proper execution environment according
8406  *      to the type of executable (platform, 64bit, chroot environment).
8407  *      Map the comm page and shared region, etc...
8408  */
8409 kern_return_t
8410 vm_map_exec(
8411         vm_map_t        new_map,
8412         task_t          task,
8413         void            *fsroot,
8414         cpu_type_t      cpu)
8415 {
8416         SHARED_REGION_TRACE_DEBUG(
8417                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8418                  current_task(), new_map, task, fsroot, cpu));
8419         (void) vm_commpage_enter(new_map, task);
8420         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8421         SHARED_REGION_TRACE_DEBUG(
8422                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8423                  current_task(), new_map, task, fsroot, cpu));
8424         return KERN_SUCCESS;
8425 }
8426
8427 /*
8428  *      vm_map_lookup_locked:
8429  *
8430  *      Finds the VM object, offset, and
8431  *      protection for a given virtual address in the
8432  *      specified map, assuming a page fault of the
8433  *      type specified.
8434  *
8435  *      Returns the (object, offset, protection) for
8436  *      this address, whether it is wired down, and whether
8437  *      this map has the only reference to the data in question.
8438  *      In order to later verify this lookup, a "version"
8439  *      is returned.
8440  *
8441  *      The map MUST be locked by the caller and WILL be
8442  *      locked on exit.  In order to guarantee the
8443  *      existence of the returned object, it is returned
8444  *      locked.
8445  *
8446  *      If a lookup is requested with "write protection"
8447  *      specified, the map may be changed to perform virtual
8448  *      copying operations, although the data referenced will
8449  *      remain the same.
8450  */
8451 kern_return_t
8452 vm_map_lookup_locked(
8453         vm_map_t                *var_map,       /* IN/OUT */
8454         vm_map_offset_t         vaddr,
8455         vm_prot_t               fault_type,
8456         int                     object_lock_type,
8457         vm_map_version_t        *out_version,   /* OUT */
8458         vm_object_t             *object,        /* OUT */
8459         vm_object_offset_t      *offset,        /* OUT */
8460         vm_prot_t               *out_prot,      /* OUT */
8461         boolean_t               *wired,         /* OUT */
8462         vm_object_fault_info_t  fault_info,     /* OUT */
8463         vm_map_t                *real_map)
8464 {
8465         vm_map_entry_t                  entry;
8466         register vm_map_t               map = *var_map;
8467         vm_map_t                        old_map = *var_map;
8468         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8469         vm_map_offset_t                 cow_parent_vaddr = 0;
8470         vm_map_offset_t                 old_start = 0;
8471         vm_map_offset_t                 old_end = 0;
8472         register vm_prot_t              prot;
8473         boolean_t                       mask_protections;
8474         vm_prot_t                       original_fault_type;
8475
8476         /*
8477          * VM_PROT_MASK means that the caller wants us to use "fault_type"
8478          * as a mask against the mapping's actual protections, not as an
8479          * absolute value.
8480          */
8481         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8482         fault_type &= ~VM_PROT_IS_MASK;
8483         original_fault_type = fault_type;
8484
8485         *real_map = map;
8486
8487 RetryLookup:
8488         fault_type = original_fault_type;
8489
8490         /*
8491          *      If the map has an interesting hint, try it before calling
8492          *      full blown lookup routine.
8493          */
8494         entry = map->hint;
8495
8496         if ((entry == vm_map_to_entry(map)) ||
8497             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8498                 vm_map_entry_t  tmp_entry;
8499
8500                 /*
8501                  *      Entry was either not a valid hint, or the vaddr
8502                  *      was not contained in the entry, so do a full lookup.
8503                  */
8504                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8505                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8506                                 vm_map_unlock(cow_sub_map_parent);
8507                         if((*real_map != map)
8508                            && (*real_map != cow_sub_map_parent))
8509                                 vm_map_unlock(*real_map);
8510                         return KERN_INVALID_ADDRESS;
8511                 }
8512
8513                 entry = tmp_entry;
8514         }
8515         if(map == old_map) {
8516                 old_start = entry->vme_start;
8517                 old_end = entry->vme_end;
8518         }
8519
8520         /*
8521          *      Handle submaps.  Drop lock on upper map, submap is
8522          *      returned locked.
8523          */
8524
8525 submap_recurse:
8526         if (entry->is_sub_map) {
8527                 vm_map_offset_t         local_vaddr;
8528                 vm_map_offset_t         end_delta;
8529                 vm_map_offset_t         start_delta;
8530                 vm_map_entry_t          submap_entry;
8531                 boolean_t               mapped_needs_copy=FALSE;
8532
8533                 local_vaddr = vaddr;
8534
8535                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8536                         /* if real_map equals map we unlock below */
8537                         if ((*real_map != map) &&
8538                             (*real_map != cow_sub_map_parent))
8539                                 vm_map_unlock(*real_map);
8540                         *real_map = entry->object.sub_map;
8541                 }
8542
8543                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8544                         if (!mapped_needs_copy) {
8545                                 if (vm_map_lock_read_to_write(map)) {
8546                                         vm_map_lock_read(map);
8547                                         /* XXX FBDP: entry still valid ? */
8548                                         if(*real_map == entry->object.sub_map)
8549                                                 *real_map = map;
8550                                         goto RetryLookup;
8551                                 }
8552                                 vm_map_lock_read(entry->object.sub_map);
8553                                 cow_sub_map_parent = map;
8554                                 /* reset base to map before cow object */
8555                                 /* this is the map which will accept   */
8556                                 /* the new cow object */
8557                                 old_start = entry->vme_start;
8558                                 old_end = entry->vme_end;
8559                                 cow_parent_vaddr = vaddr;
8560                                 mapped_needs_copy = TRUE;
8561                         } else {
8562                                 vm_map_lock_read(entry->object.sub_map);
8563                                 if((cow_sub_map_parent != map) &&
8564                                    (*real_map != map))
8565                                         vm_map_unlock(map);
8566                         }
8567                 } else {
8568                         vm_map_lock_read(entry->object.sub_map);
8569                         /* leave map locked if it is a target */
8570                         /* cow sub_map above otherwise, just  */
8571                         /* follow the maps down to the object */
8572                         /* here we unlock knowing we are not  */
8573                         /* revisiting the map.  */
8574                         if((*real_map != map) && (map != cow_sub_map_parent))
8575                                 vm_map_unlock_read(map);
8576                 }
8577
8578                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8579                 *var_map = map = entry->object.sub_map;
8580
8581                 /* calculate the offset in the submap for vaddr */
8582                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8583
8584         RetrySubMap:
8585                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8586                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8587                                 vm_map_unlock(cow_sub_map_parent);
8588                         }
8589                         if((*real_map != map)
8590                            && (*real_map != cow_sub_map_parent)) {
8591                                 vm_map_unlock(*real_map);
8592                         }
8593                         *real_map = map;
8594                         return KERN_INVALID_ADDRESS;
8595                 }
8596
8597                 /* find the attenuated shadow of the underlying object */
8598                 /* on our target map */
8599
8600                 /* in english the submap object may extend beyond the     */
8601                 /* region mapped by the entry or, may only fill a portion */
8602                 /* of it.  For our purposes, we only care if the object   */
8603                 /* doesn't fill.  In this case the area which will        */
8604                 /* ultimately be clipped in the top map will only need    */
8605                 /* to be as big as the portion of the underlying entry    */
8606                 /* which is mapped */
8607                 start_delta = submap_entry->vme_start > entry->offset ?
8608                         submap_entry->vme_start - entry->offset : 0;
8609
8610                 end_delta =
8611                         (entry->offset + start_delta + (old_end - old_start)) <=
8612                         submap_entry->vme_end ?
8613                         0 : (entry->offset +
8614                              (old_end - old_start))
8615                         - submap_entry->vme_end;
8616
8617                 old_start += start_delta;
8618                 old_end -= end_delta;
8619
8620                 if(submap_entry->is_sub_map) {
8621                         entry = submap_entry;
8622                         vaddr = local_vaddr;
8623                         goto submap_recurse;
8624                 }
8625
8626                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8627
8628                         vm_object_t     sub_object, copy_object;
8629                         vm_object_offset_t copy_offset;
8630                         vm_map_offset_t local_start;
8631                         vm_map_offset_t local_end;
8632                         boolean_t               copied_slowly = FALSE;
8633
8634                         if (vm_map_lock_read_to_write(map)) {
8635                                 vm_map_lock_read(map);
8636                                 old_start -= start_delta;
8637                                 old_end += end_delta;
8638                                 goto RetrySubMap;
8639                         }
8640
8641
8642                         sub_object = submap_entry->object.vm_object;
8643                         if (sub_object == VM_OBJECT_NULL) {
8644                                 sub_object =
8645                                         vm_object_allocate(
8646                                                 (vm_map_size_t)
8647                                                 (submap_entry->vme_end -
8648                                                  submap_entry->vme_start));
8649                                 submap_entry->object.vm_object = sub_object;
8650                                 submap_entry->offset = 0;
8651                         }
8652                         local_start =  local_vaddr -
8653                                 (cow_parent_vaddr - old_start);
8654                         local_end = local_vaddr +
8655                                 (old_end - cow_parent_vaddr);
8656                         vm_map_clip_start(map, submap_entry, local_start);
8657                         vm_map_clip_end(map, submap_entry, local_end);
8658                         /* unnesting was done in vm_map_clip_start/end() */
8659                         assert(!submap_entry->use_pmap);
8660
8661                         /* This is the COW case, lets connect */
8662                         /* an entry in our space to the underlying */
8663                         /* object in the submap, bypassing the  */
8664                         /* submap. */
8665
8666
8667                         if(submap_entry->wired_count != 0 ||
8668                            (sub_object->copy_strategy ==
8669                             MEMORY_OBJECT_COPY_NONE)) {
8670                                 vm_object_lock(sub_object);
8671                                 vm_object_copy_slowly(sub_object,
8672                                                       submap_entry->offset,
8673                                                       (submap_entry->vme_end -
8674                                                        submap_entry->vme_start),
8675                                                       FALSE,
8676                                                       &copy_object);
8677                                 copied_slowly = TRUE;
8678                         } else {
8679
8680                                 /* set up shadow object */
8681                                 copy_object = sub_object;
8682                                 vm_object_reference(copy_object);
8683                                 sub_object->shadowed = TRUE;
8684                                 submap_entry->needs_copy = TRUE;
8685
8686                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8687
8688                                 if (override_nx(map, submap_entry->alias) && prot)
8689                                         prot |= VM_PROT_EXECUTE;
8690
8691                                 vm_object_pmap_protect(
8692                                         sub_object,
8693                                         submap_entry->offset,
8694                                         submap_entry->vme_end -
8695                                         submap_entry->vme_start,
8696                                         (submap_entry->is_shared
8697                                          || map->mapped) ?
8698                                         PMAP_NULL : map->pmap,
8699                                         submap_entry->vme_start,
8700                                         prot);
8701                         }
8702
8703                         /*
8704                          * Adjust the fault offset to the submap entry.
8705                          */
8706                         copy_offset = (local_vaddr -
8707                                        submap_entry->vme_start +
8708                                        submap_entry->offset);
8709
8710                         /* This works diffently than the   */
8711                         /* normal submap case. We go back  */
8712                         /* to the parent of the cow map and*/
8713                         /* clip out the target portion of  */
8714                         /* the sub_map, substituting the   */
8715                         /* new copy object,                */
8716
8717                         vm_map_unlock(map);
8718                         local_start = old_start;
8719                         local_end = old_end;
8720                         map = cow_sub_map_parent;
8721                         *var_map = cow_sub_map_parent;
8722                         vaddr = cow_parent_vaddr;
8723                         cow_sub_map_parent = NULL;
8724
8725                         if(!vm_map_lookup_entry(map,
8726                                                 vaddr, &entry)) {
8727                                 vm_object_deallocate(
8728                                         copy_object);
8729                                 vm_map_lock_write_to_read(map);
8730                                 return KERN_INVALID_ADDRESS;
8731                         }
8732
8733                         /* clip out the portion of space */
8734                         /* mapped by the sub map which   */
8735                         /* corresponds to the underlying */
8736                         /* object */
8737
8738                         /*
8739                          * Clip (and unnest) the smallest nested chunk
8740                          * possible around the faulting address...
8741                          */
8742                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8743                         local_end = local_start + pmap_nesting_size_min;
8744                         /*
8745                          * ... but don't go beyond the "old_start" to "old_end"
8746                          * range, to avoid spanning over another VM region
8747                          * with a possibly different VM object and/or offset.
8748                          */
8749                         if (local_start < old_start) {
8750                                 local_start = old_start;
8751                         }
8752                         if (local_end > old_end) {
8753                                 local_end = old_end;
8754                         }
8755                         /*
8756                          * Adjust copy_offset to the start of the range.
8757                          */
8758                         copy_offset -= (vaddr - local_start);
8759
8760                         vm_map_clip_start(map, entry, local_start);
8761                         vm_map_clip_end(map, entry, local_end);
8762                         /* unnesting was done in vm_map_clip_start/end() */
8763                         assert(!entry->use_pmap);
8764
8765                         /* substitute copy object for */
8766                         /* shared map entry           */
8767                         vm_map_deallocate(entry->object.sub_map);
8768                         entry->is_sub_map = FALSE;
8769                         entry->object.vm_object = copy_object;
8770
8771                         /* propagate the submap entry's protections */
8772                         entry->protection |= submap_entry->protection;
8773                         entry->max_protection |= submap_entry->max_protection;
8774
8775                         if(copied_slowly) {
8776                                 entry->offset = local_start - old_start;
8777                                 entry->needs_copy = FALSE;
8778                                 entry->is_shared = FALSE;
8779                         } else {
8780                                 entry->offset = copy_offset;
8781                                 entry->needs_copy = TRUE;
8782                                 if(entry->inheritance == VM_INHERIT_SHARE)
8783                                         entry->inheritance = VM_INHERIT_COPY;
8784                                 if (map != old_map)
8785                                         entry->is_shared = TRUE;
8786                         }
8787                         if(entry->inheritance == VM_INHERIT_SHARE)
8788                                 entry->inheritance = VM_INHERIT_COPY;
8789
8790                         vm_map_lock_write_to_read(map);
8791                 } else {
8792                         if((cow_sub_map_parent)
8793                            && (cow_sub_map_parent != *real_map)
8794                            && (cow_sub_map_parent != map)) {
8795                                 vm_map_unlock(cow_sub_map_parent);
8796                         }
8797                         entry = submap_entry;
8798                         vaddr = local_vaddr;
8799                 }
8800         }
8801
8802         /*
8803          *      Check whether this task is allowed to have
8804          *      this page.
8805          */
8806
8807         prot = entry->protection;
8808
8809         if (override_nx(map, entry->alias) && prot) {
8810                 /*
8811                  * HACK -- if not a stack, then allow execution
8812                  */
8813                 prot |= VM_PROT_EXECUTE;
8814         }
8815
8816         if (mask_protections) {
8817                 fault_type &= prot;
8818                 if (fault_type == VM_PROT_NONE) {
8819                         goto protection_failure;
8820                 }
8821         }
8822         if ((fault_type & (prot)) != fault_type) {
8823         protection_failure:
8824                 if (*real_map != map) {
8825                         vm_map_unlock(*real_map);
8826                 }
8827                 *real_map = map;
8828
8829                 if ((fault_type & VM_PROT_EXECUTE) && prot)
8830                         log_stack_execution_failure((addr64_t)vaddr, prot);
8831
8832                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8833                 return KERN_PROTECTION_FAILURE;
8834         }
8835
8836         /*
8837          *      If this page is not pageable, we have to get
8838          *      it for all possible accesses.
8839          */
8840
8841         *wired = (entry->wired_count != 0);
8842         if (*wired)
8843                 fault_type = prot;
8844
8845         /*
8846          *      If the entry was copy-on-write, we either ...
8847          */
8848
8849         if (entry->needs_copy) {
8850                 /*
8851                  *      If we want to write the page, we may as well
8852                  *      handle that now since we've got the map locked.
8853                  *
8854                  *      If we don't need to write the page, we just
8855                  *      demote the permissions allowed.
8856                  */
8857
8858                 if ((fault_type & VM_PROT_WRITE) || *wired) {
8859                         /*
8860                          *      Make a new object, and place it in the
8861                          *      object chain.  Note that no new references
8862                          *      have appeared -- one just moved from the
8863                          *      map to the new object.
8864                          */
8865
8866                         if (vm_map_lock_read_to_write(map)) {
8867                                 vm_map_lock_read(map);
8868                                 goto RetryLookup;
8869                         }
8870                         vm_object_shadow(&entry->object.vm_object,
8871                                          &entry->offset,
8872                                          (vm_map_size_t) (entry->vme_end -
8873                                                           entry->vme_start));
8874
8875                         entry->object.vm_object->shadowed = TRUE;
8876                         entry->needs_copy = FALSE;
8877                         vm_map_lock_write_to_read(map);
8878                 }
8879                 else {
8880                         /*
8881                          *      We're attempting to read a copy-on-write
8882                          *      page -- don't allow writes.
8883                          */
8884
8885                         prot &= (~VM_PROT_WRITE);
8886                 }
8887         }
8888
8889         /*
8890          *      Create an object if necessary.
8891          */
8892         if (entry->object.vm_object == VM_OBJECT_NULL) {
8893
8894                 if (vm_map_lock_read_to_write(map)) {
8895                         vm_map_lock_read(map);
8896                         goto RetryLookup;
8897                 }
8898
8899                 entry->object.vm_object = vm_object_allocate(
8900                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
8901                 entry->offset = 0;
8902                 vm_map_lock_write_to_read(map);
8903         }
8904
8905         /*
8906          *      Return the object/offset from this entry.  If the entry
8907          *      was copy-on-write or empty, it has been fixed up.  Also
8908          *      return the protection.
8909          */
8910
8911         *offset = (vaddr - entry->vme_start) + entry->offset;
8912         *object = entry->object.vm_object;
8913         *out_prot = prot;
8914
8915         if (fault_info) {
8916                 fault_info->interruptible = THREAD_UNINT; /* for now... */
8917                 /* ... the caller will change "interruptible" if needed */
8918                 fault_info->cluster_size = 0;
8919                 fault_info->user_tag = entry->alias;
8920                 fault_info->behavior = entry->behavior;
8921                 fault_info->lo_offset = entry->offset;
8922                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8923                 fault_info->no_cache  = entry->no_cache;
8924                 fault_info->stealth = FALSE;
8925                 fault_info->io_sync = FALSE;
8926                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
8927                 fault_info->mark_zf_absent = FALSE;
8928         }
8929
8930         /*
8931          *      Lock the object to prevent it from disappearing
8932          */
8933         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8934                 vm_object_lock(*object);
8935         else
8936                 vm_object_lock_shared(*object);
8937
8938         /*
8939          *      Save the version number
8940          */
8941
8942         out_version->main_timestamp = map->timestamp;
8943
8944         return KERN_SUCCESS;
8945 }
8946
8947
8948 /*
8949  *      vm_map_verify:
8950  *
8951  *      Verifies that the map in question has not changed
8952  *      since the given version.  If successful, the map
8953  *      will not change until vm_map_verify_done() is called.
8954  */
8955 boolean_t
8956 vm_map_verify(
8957         register vm_map_t               map,
8958         register vm_map_version_t       *version)       /* REF */
8959 {
8960         boolean_t       result;
8961
8962         vm_map_lock_read(map);
8963         result = (map->timestamp == version->main_timestamp);
8964
8965         if (!result)
8966                 vm_map_unlock_read(map);
8967
8968         return(result);
8969 }
8970
8971 /*
8972  *      vm_map_verify_done:
8973  *
8974  *      Releases locks acquired by a vm_map_verify.
8975  *
8976  *      This is now a macro in vm/vm_map.h.  It does a
8977  *      vm_map_unlock_read on the map.
8978  */
8979
8980
8981 /*
8982  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8983  *      Goes away after regular vm_region_recurse function migrates to
8984  *      64 bits
8985  *      vm_region_recurse: A form of vm_region which follows the
8986  *      submaps in a target map
8987  *
8988  */
8989
8990 kern_return_t
8991 vm_map_region_recurse_64(
8992         vm_map_t                 map,
8993         vm_map_offset_t *address,               /* IN/OUT */
8994         vm_map_size_t           *size,                  /* OUT */
8995         natural_t               *nesting_depth, /* IN/OUT */
8996         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
8997         mach_msg_type_number_t  *count) /* IN/OUT */
8998 {
8999         vm_region_extended_info_data_t  extended;
9000         vm_map_entry_t                  tmp_entry;
9001         vm_map_offset_t                 user_address;
9002         unsigned int                    user_max_depth;
9003
9004         /*
9005          * "curr_entry" is the VM map entry preceding or including the
9006          * address we're looking for.
9007          * "curr_map" is the map or sub-map containing "curr_entry".
9008          * "curr_address" is the equivalent of the top map's "user_address"
9009          * in the current map.
9010          * "curr_offset" is the cumulated offset of "curr_map" in the
9011          * target task's address space.
9012          * "curr_depth" is the depth of "curr_map" in the chain of
9013          * sub-maps.
9014          *
9015          * "curr_max_below" and "curr_max_above" limit the range (around
9016          * "curr_address") we should take into account in the current (sub)map.
9017          * They limit the range to what's visible through the map entries
9018          * we've traversed from the top map to the current map.
9019
9020          */
9021         vm_map_entry_t                  curr_entry;
9022         vm_map_address_t                curr_address;
9023         vm_map_offset_t                 curr_offset;
9024         vm_map_t                        curr_map;
9025         unsigned int                    curr_depth;
9026         vm_map_offset_t                 curr_max_below, curr_max_above;
9027         vm_map_offset_t                 curr_skip;
9028
9029         /*
9030          * "next_" is the same as "curr_" but for the VM region immediately
9031          * after the address we're looking for.  We need to keep track of this
9032          * too because we want to return info about that region if the
9033          * address we're looking for is not mapped.
9034          */
9035         vm_map_entry_t                  next_entry;
9036         vm_map_offset_t                 next_offset;
9037         vm_map_offset_t                 next_address;
9038         vm_map_t                        next_map;
9039         unsigned int                    next_depth;
9040         vm_map_offset_t                 next_max_below, next_max_above;
9041         vm_map_offset_t                 next_skip;
9042
9043         boolean_t                       look_for_pages;
9044         vm_region_submap_short_info_64_t short_info;
9045
9046         if (map == VM_MAP_NULL) {
9047                 /* no address space to work on */
9048                 return KERN_INVALID_ARGUMENT;
9049         }
9050
9051         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9052                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9053                         /*
9054                          * "info" structure is not big enough and
9055                          * would overflow
9056                          */
9057                         return KERN_INVALID_ARGUMENT;
9058                 } else {
9059                         look_for_pages = FALSE;
9060                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9061                         short_info = (vm_region_submap_short_info_64_t) submap_info;
9062                         submap_info = NULL;
9063                 }
9064         } else {
9065                 look_for_pages = TRUE;
9066                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9067                 short_info = NULL;
9068         }
9069
9070
9071         user_address = *address;
9072         user_max_depth = *nesting_depth;
9073
9074         curr_entry = NULL;
9075         curr_map = map;
9076         curr_address = user_address;
9077         curr_offset = 0;
9078         curr_skip = 0;
9079         curr_depth = 0;
9080         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9081         curr_max_below = curr_address;
9082
9083         next_entry = NULL;
9084         next_map = NULL;
9085         next_address = 0;
9086         next_offset = 0;
9087         next_skip = 0;
9088         next_depth = 0;
9089         next_max_above = (vm_map_offset_t) -1;
9090         next_max_below = (vm_map_offset_t) -1;
9091
9092         if (not_in_kdp) {
9093                 vm_map_lock_read(curr_map);
9094         }
9095
9096         for (;;) {
9097                 if (vm_map_lookup_entry(curr_map,
9098                                         curr_address,
9099                                         &tmp_entry)) {
9100                         /* tmp_entry contains the address we're looking for */
9101                         curr_entry = tmp_entry;
9102                 } else {
9103                         vm_map_offset_t skip;
9104                         /*
9105                          * The address is not mapped.  "tmp_entry" is the
9106                          * map entry preceding the address.  We want the next
9107                          * one, if it exists.
9108                          */
9109                         curr_entry = tmp_entry->vme_next;
9110
9111                         if (curr_entry == vm_map_to_entry(curr_map) ||
9112                             (curr_entry->vme_start >=
9113                              curr_address + curr_max_above)) {
9114                                 /* no next entry at this level: stop looking */
9115                                 if (not_in_kdp) {
9116                                         vm_map_unlock_read(curr_map);
9117                                 }
9118                                 curr_entry = NULL;
9119                                 curr_map = NULL;
9120                                 curr_offset = 0;
9121                                 curr_depth = 0;
9122                                 curr_max_above = 0;
9123                                 curr_max_below = 0;
9124                                 break;
9125                         }
9126
9127                         /* adjust current address and offset */
9128                         skip = curr_entry->vme_start - curr_address;
9129                         curr_address = curr_entry->vme_start;
9130                         curr_skip = skip;
9131                         curr_offset += skip;
9132                         curr_max_above -= skip;
9133                         curr_max_below = 0;
9134                 }
9135
9136                 /*
9137                  * Is the next entry at this level closer to the address (or
9138                  * deeper in the submap chain) than the one we had
9139                  * so far ?
9140                  */
9141                 tmp_entry = curr_entry->vme_next;
9142                 if (tmp_entry == vm_map_to_entry(curr_map)) {
9143                         /* no next entry at this level */
9144                 } else if (tmp_entry->vme_start >=
9145                            curr_address + curr_max_above) {
9146                         /*
9147                          * tmp_entry is beyond the scope of what we mapped of
9148                          * this submap in the upper level: ignore it.
9149                          */
9150                 } else if ((next_entry == NULL) ||
9151                            (tmp_entry->vme_start + curr_offset <=
9152                             next_entry->vme_start + next_offset)) {
9153                         /*
9154                          * We didn't have a "next_entry" or this one is
9155                          * closer to the address we're looking for:
9156                          * use this "tmp_entry" as the new "next_entry".
9157                          */
9158                         if (next_entry != NULL) {
9159                                 /* unlock the last "next_map" */
9160                                 if (next_map != curr_map && not_in_kdp) {
9161                                         vm_map_unlock_read(next_map);
9162                                 }
9163                         }
9164                         next_entry = tmp_entry;
9165                         next_map = curr_map;
9166                         next_depth = curr_depth;
9167                         next_address = next_entry->vme_start;
9168                         next_skip = curr_skip;
9169                         next_offset = curr_offset;
9170                         next_offset += (next_address - curr_address);
9171                         next_max_above = MIN(next_max_above, curr_max_above);
9172                         next_max_above = MIN(next_max_above,
9173                                              next_entry->vme_end - next_address);
9174                         next_max_below = MIN(next_max_below, curr_max_below);
9175                         next_max_below = MIN(next_max_below,
9176                                              next_address - next_entry->vme_start);
9177                 }
9178
9179                 /*
9180                  * "curr_max_{above,below}" allow us to keep track of the
9181                  * portion of the submap that is actually mapped at this level:
9182                  * the rest of that submap is irrelevant to us, since it's not
9183                  * mapped here.
9184                  * The relevant portion of the map starts at
9185                  * "curr_entry->offset" up to the size of "curr_entry".
9186                  */
9187                 curr_max_above = MIN(curr_max_above,
9188                                      curr_entry->vme_end - curr_address);
9189                 curr_max_below = MIN(curr_max_below,
9190                                      curr_address - curr_entry->vme_start);
9191
9192                 if (!curr_entry->is_sub_map ||
9193                     curr_depth >= user_max_depth) {
9194                         /*
9195                          * We hit a leaf map or we reached the maximum depth
9196                          * we could, so stop looking.  Keep the current map
9197                          * locked.
9198                          */
9199                         break;
9200                 }
9201
9202                 /*
9203                  * Get down to the next submap level.
9204                  */
9205
9206                 /*
9207                  * Lock the next level and unlock the current level,
9208                  * unless we need to keep it locked to access the "next_entry"
9209                  * later.
9210                  */
9211                 if (not_in_kdp) {
9212                         vm_map_lock_read(curr_entry->object.sub_map);
9213                 }
9214                 if (curr_map == next_map) {
9215                         /* keep "next_map" locked in case we need it */
9216                 } else {
9217                         /* release this map */
9218                         if (not_in_kdp)
9219                                 vm_map_unlock_read(curr_map);
9220                 }
9221
9222                 /*
9223                  * Adjust the offset.  "curr_entry" maps the submap
9224                  * at relative address "curr_entry->vme_start" in the
9225                  * curr_map but skips the first "curr_entry->offset"
9226                  * bytes of the submap.
9227                  * "curr_offset" always represents the offset of a virtual
9228                  * address in the curr_map relative to the absolute address
9229                  * space (i.e. the top-level VM map).
9230                  */
9231                 curr_offset +=
9232                         (curr_entry->offset - curr_entry->vme_start);
9233                 curr_address = user_address + curr_offset;
9234                 /* switch to the submap */
9235                 curr_map = curr_entry->object.sub_map;
9236                 curr_depth++;
9237                 curr_entry = NULL;
9238         }
9239
9240         if (curr_entry == NULL) {
9241                 /* no VM region contains the address... */
9242                 if (next_entry == NULL) {
9243                         /* ... and no VM region follows it either */
9244                         return KERN_INVALID_ADDRESS;
9245                 }
9246                 /* ... gather info about the next VM region */
9247                 curr_entry = next_entry;
9248                 curr_map = next_map;    /* still locked ... */
9249                 curr_address = next_address;
9250                 curr_skip = next_skip;
9251                 curr_offset = next_offset;
9252                 curr_depth = next_depth;
9253                 curr_max_above = next_max_above;
9254                 curr_max_below = next_max_below;
9255                 if (curr_map == map) {
9256                         user_address = curr_address;
9257                 }
9258         } else {
9259                 /* we won't need "next_entry" after all */
9260                 if (next_entry != NULL) {
9261                         /* release "next_map" */
9262                         if (next_map != curr_map && not_in_kdp) {
9263                                 vm_map_unlock_read(next_map);
9264                         }
9265                 }
9266         }
9267         next_entry = NULL;
9268         next_map = NULL;
9269         next_offset = 0;
9270         next_skip = 0;
9271         next_depth = 0;
9272         next_max_below = -1;
9273         next_max_above = -1;
9274
9275         *nesting_depth = curr_depth;
9276         *size = curr_max_above + curr_max_below;
9277         *address = user_address + curr_skip - curr_max_below;
9278
9279 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9280 // so probably should be a real 32b ID vs. ptr.
9281 // Current users just check for equality
9282 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9283
9284         if (look_for_pages) {
9285                 submap_info->user_tag = curr_entry->alias;
9286                 submap_info->offset = curr_entry->offset;
9287                 submap_info->protection = curr_entry->protection;
9288                 submap_info->inheritance = curr_entry->inheritance;
9289                 submap_info->max_protection = curr_entry->max_protection;
9290                 submap_info->behavior = curr_entry->behavior;
9291                 submap_info->user_wired_count = curr_entry->user_wired_count;
9292                 submap_info->is_submap = curr_entry->is_sub_map;
9293                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9294         } else {
9295                 short_info->user_tag = curr_entry->alias;
9296                 short_info->offset = curr_entry->offset;
9297                 short_info->protection = curr_entry->protection;
9298                 short_info->inheritance = curr_entry->inheritance;
9299                 short_info->max_protection = curr_entry->max_protection;
9300                 short_info->behavior = curr_entry->behavior;
9301                 short_info->user_wired_count = curr_entry->user_wired_count;
9302                 short_info->is_submap = curr_entry->is_sub_map;
9303                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9304         }
9305
9306         extended.pages_resident = 0;
9307         extended.pages_swapped_out = 0;
9308         extended.pages_shared_now_private = 0;
9309         extended.pages_dirtied = 0;
9310         extended.external_pager = 0;
9311         extended.shadow_depth = 0;
9312
9313         if (not_in_kdp) {
9314                 if (!curr_entry->is_sub_map) {
9315                         vm_map_offset_t range_start, range_end;
9316                         range_start = MAX((curr_address - curr_max_below),
9317                                           curr_entry->vme_start);
9318                         range_end = MIN((curr_address + curr_max_above),
9319                                         curr_entry->vme_end);
9320                         vm_map_region_walk(curr_map,
9321                                            range_start,
9322                                            curr_entry,
9323                                            (curr_entry->offset +
9324                                             (range_start -
9325                                              curr_entry->vme_start)),
9326                                            range_end - range_start,
9327                                            &extended,
9328                                            look_for_pages);
9329                         if (extended.external_pager &&
9330                             extended.ref_count == 2 &&
9331                             extended.share_mode == SM_SHARED) {
9332                                 extended.share_mode = SM_PRIVATE;
9333                         }
9334                 } else {
9335                         if (curr_entry->use_pmap) {
9336                                 extended.share_mode = SM_TRUESHARED;
9337                         } else {
9338                                 extended.share_mode = SM_PRIVATE;
9339                         }
9340                         extended.ref_count =
9341                                 curr_entry->object.sub_map->ref_count;
9342                 }
9343         }
9344
9345         if (look_for_pages) {
9346                 submap_info->pages_resident = extended.pages_resident;
9347                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9348                 submap_info->pages_shared_now_private =
9349                         extended.pages_shared_now_private;
9350                 submap_info->pages_dirtied = extended.pages_dirtied;
9351                 submap_info->external_pager = extended.external_pager;
9352                 submap_info->shadow_depth = extended.shadow_depth;
9353                 submap_info->share_mode = extended.share_mode;
9354                 submap_info->ref_count = extended.ref_count;
9355         } else {
9356                 short_info->external_pager = extended.external_pager;
9357                 short_info->shadow_depth = extended.shadow_depth;
9358                 short_info->share_mode = extended.share_mode;
9359                 short_info->ref_count = extended.ref_count;
9360         }
9361
9362         if (not_in_kdp) {
9363                 vm_map_unlock_read(curr_map);
9364         }
9365
9366         return KERN_SUCCESS;
9367 }
9368
9369 /*
9370  *      vm_region:
9371  *
9372  *      User call to obtain information about a region in
9373  *      a task's address map. Currently, only one flavor is
9374  *      supported.
9375  *
9376  *      XXX The reserved and behavior fields cannot be filled
9377  *          in until the vm merge from the IK is completed, and
9378  *          vm_reserve is implemented.
9379  */
9380
9381 kern_return_t
9382 vm_map_region(
9383         vm_map_t                 map,
9384         vm_map_offset_t *address,               /* IN/OUT */
9385         vm_map_size_t           *size,                  /* OUT */
9386         vm_region_flavor_t       flavor,                /* IN */
9387         vm_region_info_t         info,                  /* OUT */
9388         mach_msg_type_number_t  *count, /* IN/OUT */
9389         mach_port_t             *object_name)           /* OUT */
9390 {
9391         vm_map_entry_t          tmp_entry;
9392         vm_map_entry_t          entry;
9393         vm_map_offset_t         start;
9394
9395         if (map == VM_MAP_NULL)
9396                 return(KERN_INVALID_ARGUMENT);
9397
9398         switch (flavor) {
9399
9400         case VM_REGION_BASIC_INFO:
9401                 /* legacy for old 32-bit objects info */
9402         {
9403                 vm_region_basic_info_t  basic;
9404
9405                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9406                         return(KERN_INVALID_ARGUMENT);
9407
9408                 basic = (vm_region_basic_info_t) info;
9409                 *count = VM_REGION_BASIC_INFO_COUNT;
9410
9411                 vm_map_lock_read(map);
9412
9413                 start = *address;
9414                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9415                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9416                                 vm_map_unlock_read(map);
9417                                 return(KERN_INVALID_ADDRESS);
9418                         }
9419                 } else {
9420                         entry = tmp_entry;
9421                 }
9422
9423                 start = entry->vme_start;
9424
9425                 basic->offset = (uint32_t)entry->offset;
9426                 basic->protection = entry->protection;
9427                 basic->inheritance = entry->inheritance;
9428                 basic->max_protection = entry->max_protection;
9429                 basic->behavior = entry->behavior;
9430                 basic->user_wired_count = entry->user_wired_count;
9431                 basic->reserved = entry->is_sub_map;
9432                 *address = start;
9433                 *size = (entry->vme_end - start);
9434
9435                 if (object_name) *object_name = IP_NULL;
9436                 if (entry->is_sub_map) {
9437                         basic->shared = FALSE;
9438                 } else {
9439                         basic->shared = entry->is_shared;
9440                 }
9441
9442                 vm_map_unlock_read(map);
9443                 return(KERN_SUCCESS);
9444         }
9445
9446         case VM_REGION_BASIC_INFO_64:
9447         {
9448                 vm_region_basic_info_64_t       basic;
9449
9450                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9451                         return(KERN_INVALID_ARGUMENT);
9452
9453                 basic = (vm_region_basic_info_64_t) info;
9454                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9455
9456                 vm_map_lock_read(map);
9457
9458                 start = *address;
9459                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9460                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9461                                 vm_map_unlock_read(map);
9462                                 return(KERN_INVALID_ADDRESS);
9463                         }
9464                 } else {
9465                         entry = tmp_entry;
9466                 }
9467
9468                 start = entry->vme_start;
9469
9470                 basic->offset = entry->offset;
9471                 basic->protection = entry->protection;
9472                 basic->inheritance = entry->inheritance;
9473                 basic->max_protection = entry->max_protection;
9474                 basic->behavior = entry->behavior;
9475                 basic->user_wired_count = entry->user_wired_count;
9476                 basic->reserved = entry->is_sub_map;
9477                 *address = start;
9478                 *size = (entry->vme_end - start);
9479
9480                 if (object_name) *object_name = IP_NULL;
9481                 if (entry->is_sub_map) {
9482                         basic->shared = FALSE;
9483                 } else {
9484                         basic->shared = entry->is_shared;
9485                 }
9486
9487                 vm_map_unlock_read(map);
9488                 return(KERN_SUCCESS);
9489         }
9490         case VM_REGION_EXTENDED_INFO:
9491         {
9492                 vm_region_extended_info_t       extended;
9493
9494                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9495                         return(KERN_INVALID_ARGUMENT);
9496
9497                 extended = (vm_region_extended_info_t) info;
9498                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9499
9500                 vm_map_lock_read(map);
9501
9502                 start = *address;
9503                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9504                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9505                                 vm_map_unlock_read(map);
9506                                 return(KERN_INVALID_ADDRESS);
9507                         }
9508                 } else {
9509                         entry = tmp_entry;
9510                 }
9511                 start = entry->vme_start;
9512
9513                 extended->protection = entry->protection;
9514                 extended->user_tag = entry->alias;
9515                 extended->pages_resident = 0;
9516                 extended->pages_swapped_out = 0;
9517                 extended->pages_shared_now_private = 0;
9518                 extended->pages_dirtied = 0;
9519                 extended->external_pager = 0;
9520                 extended->shadow_depth = 0;
9521
9522                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9523
9524                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9525                         extended->share_mode = SM_PRIVATE;
9526
9527                 if (object_name)
9528                         *object_name = IP_NULL;
9529                 *address = start;
9530                 *size = (entry->vme_end - start);
9531
9532                 vm_map_unlock_read(map);
9533                 return(KERN_SUCCESS);
9534         }
9535         case VM_REGION_TOP_INFO:
9536         {
9537                 vm_region_top_info_t    top;
9538
9539                 if (*count < VM_REGION_TOP_INFO_COUNT)
9540                         return(KERN_INVALID_ARGUMENT);
9541
9542                 top = (vm_region_top_info_t) info;
9543                 *count = VM_REGION_TOP_INFO_COUNT;
9544
9545                 vm_map_lock_read(map);
9546
9547                 start = *address;
9548                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9549                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9550                                 vm_map_unlock_read(map);
9551                                 return(KERN_INVALID_ADDRESS);
9552                         }
9553                 } else {
9554                         entry = tmp_entry;
9555
9556                 }
9557                 start = entry->vme_start;
9558
9559                 top->private_pages_resident = 0;
9560                 top->shared_pages_resident = 0;
9561
9562                 vm_map_region_top_walk(entry, top);
9563
9564                 if (object_name)
9565                         *object_name = IP_NULL;
9566                 *address = start;
9567                 *size = (entry->vme_end - start);
9568
9569                 vm_map_unlock_read(map);
9570                 return(KERN_SUCCESS);
9571         }
9572         default:
9573                 return(KERN_INVALID_ARGUMENT);
9574         }
9575 }
9576
9577 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9578         MIN((entry_size),                                               \
9579             ((obj)->all_reusable ?                                      \
9580              (obj)->wired_page_count :                                  \
9581              (obj)->resident_page_count - (obj)->reusable_page_count))
9582
9583 void
9584 vm_map_region_top_walk(
9585         vm_map_entry_t             entry,
9586         vm_region_top_info_t       top)
9587 {
9588
9589         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9590                 top->share_mode = SM_EMPTY;
9591                 top->ref_count = 0;
9592                 top->obj_id = 0;
9593                 return;
9594         }
9595
9596         {
9597                 struct  vm_object *obj, *tmp_obj;
9598                 int             ref_count;
9599                 uint32_t        entry_size;
9600
9601                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9602
9603                 obj = entry->object.vm_object;
9604
9605                 vm_object_lock(obj);
9606
9607                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9608                         ref_count--;
9609
9610                 assert(obj->reusable_page_count <= obj->resident_page_count);
9611                 if (obj->shadow) {
9612                         if (ref_count == 1)
9613                                 top->private_pages_resident =
9614                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9615                         else
9616                                 top->shared_pages_resident =
9617                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9618                         top->ref_count  = ref_count;
9619                         top->share_mode = SM_COW;
9620
9621                         while ((tmp_obj = obj->shadow)) {
9622                                 vm_object_lock(tmp_obj);
9623                                 vm_object_unlock(obj);
9624                                 obj = tmp_obj;
9625
9626                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9627                                         ref_count--;
9628
9629                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9630                                 top->shared_pages_resident +=
9631                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9632                                 top->ref_count += ref_count - 1;
9633                         }
9634                 } else {
9635                         if (entry->superpage_size) {
9636                                 top->share_mode = SM_LARGE_PAGE;
9637                                 top->shared_pages_resident = 0;
9638                                 top->private_pages_resident = entry_size;
9639                         } else if (entry->needs_copy) {
9640                                 top->share_mode = SM_COW;
9641                                 top->shared_pages_resident =
9642                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9643                         } else {
9644                                 if (ref_count == 1 ||
9645                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9646                                         top->share_mode = SM_PRIVATE;
9647                                         top->private_pages_resident =
9648                                                 OBJ_RESIDENT_COUNT(obj,
9649                                                                    entry_size);
9650                                 } else {
9651                                         top->share_mode = SM_SHARED;
9652                                         top->shared_pages_resident =
9653                                                 OBJ_RESIDENT_COUNT(obj,
9654                                                                   entry_size);
9655                                 }
9656                         }
9657                         top->ref_count = ref_count;
9658                 }
9659                 /* XXX K64: obj_id will be truncated */
9660                 top->obj_id = (unsigned int) (uintptr_t)obj;
9661
9662                 vm_object_unlock(obj);
9663         }
9664 }
9665
9666 void
9667 vm_map_region_walk(
9668         vm_map_t                        map,
9669         vm_map_offset_t                 va,
9670         vm_map_entry_t                  entry,
9671         vm_object_offset_t              offset,
9672         vm_object_size_t                range,
9673         vm_region_extended_info_t       extended,
9674         boolean_t                       look_for_pages)
9675 {
9676         register struct vm_object *obj, *tmp_obj;
9677         register vm_map_offset_t       last_offset;
9678         register int               i;
9679         register int               ref_count;
9680         struct vm_object        *shadow_object;
9681         int                     shadow_depth;
9682
9683         if ((entry->object.vm_object == 0) ||
9684             (entry->is_sub_map) ||
9685             (entry->object.vm_object->phys_contiguous &&
9686              !entry->superpage_size)) {
9687                 extended->share_mode = SM_EMPTY;
9688                 extended->ref_count = 0;
9689                 return;
9690         }
9691
9692         if (entry->superpage_size) {
9693                 extended->shadow_depth = 0;
9694                 extended->share_mode = SM_LARGE_PAGE;
9695                 extended->ref_count = 1;
9696                 extended->external_pager = 0;
9697                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9698                 extended->shadow_depth = 0;
9699                 return;
9700         }
9701
9702         {
9703                 obj = entry->object.vm_object;
9704
9705                 vm_object_lock(obj);
9706
9707                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9708                         ref_count--;
9709
9710                 if (look_for_pages) {
9711                         for (last_offset = offset + range;
9712                              offset < last_offset;
9713                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9714                                 vm_map_region_look_for_page(map, va, obj,
9715                                                             offset, ref_count,
9716                                                             0, extended);
9717                 } else {
9718                         shadow_object = obj->shadow;
9719                         shadow_depth = 0;
9720
9721                         if ( !(obj->pager_trusted) && !(obj->internal))
9722                                 extended->external_pager = 1;
9723
9724                         if (shadow_object != VM_OBJECT_NULL) {
9725                                 vm_object_lock(shadow_object);
9726                                 for (;
9727                                      shadow_object != VM_OBJECT_NULL;
9728                                      shadow_depth++) {
9729                                         vm_object_t     next_shadow;
9730
9731                                         if ( !(shadow_object->pager_trusted) &&
9732                                              !(shadow_object->internal))
9733                                                 extended->external_pager = 1;
9734
9735                                         next_shadow = shadow_object->shadow;
9736                                         if (next_shadow) {
9737                                                 vm_object_lock(next_shadow);
9738                                         }
9739                                         vm_object_unlock(shadow_object);
9740                                         shadow_object = next_shadow;
9741                                 }
9742                         }
9743                         extended->shadow_depth = shadow_depth;
9744                 }
9745
9746                 if (extended->shadow_depth || entry->needs_copy)
9747                         extended->share_mode = SM_COW;
9748                 else {
9749                         if (ref_count == 1)
9750                                 extended->share_mode = SM_PRIVATE;
9751                         else {
9752                                 if (obj->true_share)
9753                                         extended->share_mode = SM_TRUESHARED;
9754                                 else
9755                                         extended->share_mode = SM_SHARED;
9756                         }
9757                 }
9758                 extended->ref_count = ref_count - extended->shadow_depth;
9759
9760                 for (i = 0; i < extended->shadow_depth; i++) {
9761                         if ((tmp_obj = obj->shadow) == 0)
9762                                 break;
9763                         vm_object_lock(tmp_obj);
9764                         vm_object_unlock(obj);
9765
9766                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9767                                 ref_count--;
9768
9769                         extended->ref_count += ref_count;
9770                         obj = tmp_obj;
9771                 }
9772                 vm_object_unlock(obj);
9773
9774                 if (extended->share_mode == SM_SHARED) {
9775                         register vm_map_entry_t      cur;
9776                         register vm_map_entry_t      last;
9777                         int      my_refs;
9778
9779                         obj = entry->object.vm_object;
9780                         last = vm_map_to_entry(map);
9781                         my_refs = 0;
9782
9783                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9784                                 ref_count--;
9785                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9786                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
9787
9788                         if (my_refs == ref_count)
9789                                 extended->share_mode = SM_PRIVATE_ALIASED;
9790                         else if (my_refs > 1)
9791                                 extended->share_mode = SM_SHARED_ALIASED;
9792                 }
9793         }
9794 }
9795
9796
9797 /* object is locked on entry and locked on return */
9798
9799
9800 static void
9801 vm_map_region_look_for_page(
9802         __unused vm_map_t               map,
9803         __unused vm_map_offset_t        va,
9804         vm_object_t                     object,
9805         vm_object_offset_t              offset,
9806         int                             max_refcnt,
9807         int                             depth,
9808         vm_region_extended_info_t       extended)
9809 {
9810         register vm_page_t      p;
9811         register vm_object_t    shadow;
9812         register int            ref_count;
9813         vm_object_t             caller_object;
9814 #if     MACH_PAGEMAP
9815         kern_return_t           kr;
9816 #endif
9817         shadow = object->shadow;
9818         caller_object = object;
9819
9820
9821         while (TRUE) {
9822
9823                 if ( !(object->pager_trusted) && !(object->internal))
9824                         extended->external_pager = 1;
9825
9826                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9827                         if (shadow && (max_refcnt == 1))
9828                                 extended->pages_shared_now_private++;
9829
9830                         if (!p->fictitious &&
9831                             (p->dirty || pmap_is_modified(p->phys_page)))
9832                                 extended->pages_dirtied++;
9833
9834                         extended->pages_resident++;
9835
9836                         if(object != caller_object)
9837                                 vm_object_unlock(object);
9838
9839                         return;
9840                 }
9841 #if     MACH_PAGEMAP
9842                 if (object->existence_map) {
9843                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9844
9845                                 extended->pages_swapped_out++;
9846
9847                                 if(object != caller_object)
9848                                         vm_object_unlock(object);
9849
9850                                 return;
9851                         }
9852                 } else if (object->internal &&
9853                            object->alive &&
9854                            !object->terminating &&
9855                            object->pager_ready) {
9856
9857                         memory_object_t pager;
9858
9859                         vm_object_paging_begin(object);
9860                         pager = object->pager;
9861                         vm_object_unlock(object);
9862
9863                         kr = memory_object_data_request(
9864                                 pager,
9865                                 offset + object->paging_offset,
9866                                 0, /* just poke the pager */
9867                                 VM_PROT_READ,
9868                                 NULL);
9869
9870                         vm_object_lock(object);
9871                         vm_object_paging_end(object);
9872
9873                         if (kr == KERN_SUCCESS) {
9874                                 /* the pager has that page */
9875                                 extended->pages_swapped_out++;
9876                                 if (object != caller_object)
9877                                         vm_object_unlock(object);
9878                                 return;
9879                         }
9880                 }
9881 #endif /* MACH_PAGEMAP */
9882
9883                 if (shadow) {
9884                         vm_object_lock(shadow);
9885
9886                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9887                                 ref_count--;
9888
9889                         if (++depth > extended->shadow_depth)
9890                                 extended->shadow_depth = depth;
9891
9892                         if (ref_count > max_refcnt)
9893                                 max_refcnt = ref_count;
9894
9895                         if(object != caller_object)
9896                                 vm_object_unlock(object);
9897
9898                         offset = offset + object->vo_shadow_offset;
9899                         object = shadow;
9900                         shadow = object->shadow;
9901                         continue;
9902                 }
9903                 if(object != caller_object)
9904                         vm_object_unlock(object);
9905                 break;
9906         }
9907 }
9908
9909 static int
9910 vm_map_region_count_obj_refs(
9911         vm_map_entry_t    entry,
9912         vm_object_t       object)
9913 {
9914         register int ref_count;
9915         register vm_object_t chk_obj;
9916         register vm_object_t tmp_obj;
9917
9918         if (entry->object.vm_object == 0)
9919                 return(0);
9920
9921         if (entry->is_sub_map)
9922                 return(0);
9923         else {
9924                 ref_count = 0;
9925
9926                 chk_obj = entry->object.vm_object;
9927                 vm_object_lock(chk_obj);
9928
9929                 while (chk_obj) {
9930                         if (chk_obj == object)
9931                                 ref_count++;
9932                         tmp_obj = chk_obj->shadow;
9933                         if (tmp_obj)
9934                                 vm_object_lock(tmp_obj);
9935                         vm_object_unlock(chk_obj);
9936
9937                         chk_obj = tmp_obj;
9938                 }
9939         }
9940         return(ref_count);
9941 }
9942
9943
9944 /*
9945  *      Routine:        vm_map_simplify
9946  *
9947  *      Description:
9948  *              Attempt to simplify the map representation in
9949  *              the vicinity of the given starting address.
9950  *      Note:
9951  *              This routine is intended primarily to keep the
9952  *              kernel maps more compact -- they generally don't
9953  *              benefit from the "expand a map entry" technology
9954  *              at allocation time because the adjacent entry
9955  *              is often wired down.
9956  */
9957 void
9958 vm_map_simplify_entry(
9959         vm_map_t        map,
9960         vm_map_entry_t  this_entry)
9961 {
9962         vm_map_entry_t  prev_entry;
9963
9964         counter(c_vm_map_simplify_entry_called++);
9965
9966         prev_entry = this_entry->vme_prev;
9967
9968         if ((this_entry != vm_map_to_entry(map)) &&
9969             (prev_entry != vm_map_to_entry(map)) &&
9970
9971             (prev_entry->vme_end == this_entry->vme_start) &&
9972
9973             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9974
9975             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9976             ((prev_entry->offset + (prev_entry->vme_end -
9977                                     prev_entry->vme_start))
9978              == this_entry->offset) &&
9979
9980             (prev_entry->inheritance == this_entry->inheritance) &&
9981             (prev_entry->protection == this_entry->protection) &&
9982             (prev_entry->max_protection == this_entry->max_protection) &&
9983             (prev_entry->behavior == this_entry->behavior) &&
9984             (prev_entry->alias == this_entry->alias) &&
9985             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9986             (prev_entry->no_cache == this_entry->no_cache) &&
9987             (prev_entry->wired_count == this_entry->wired_count) &&
9988             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9989
9990             (prev_entry->needs_copy == this_entry->needs_copy) &&
9991             (prev_entry->permanent == this_entry->permanent) &&
9992
9993             (prev_entry->use_pmap == FALSE) &&
9994             (this_entry->use_pmap == FALSE) &&
9995             (prev_entry->in_transition == FALSE) &&
9996             (this_entry->in_transition == FALSE) &&
9997             (prev_entry->needs_wakeup == FALSE) &&
9998             (this_entry->needs_wakeup == FALSE) &&
9999             (prev_entry->is_shared == FALSE) &&
10000             (this_entry->is_shared == FALSE)
10001                 ) {
10002                 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
10003                 this_entry->vme_start = prev_entry->vme_start;
10004                 this_entry->offset = prev_entry->offset;
10005                 if (prev_entry->is_sub_map) {
10006                         vm_map_deallocate(prev_entry->object.sub_map);
10007                 } else {
10008                         vm_object_deallocate(prev_entry->object.vm_object);
10009                 }
10010                 vm_map_entry_dispose(map, prev_entry);
10011                 SAVE_HINT_MAP_WRITE(map, this_entry);
10012                 counter(c_vm_map_simplified++);
10013         }
10014 }
10015
10016 void
10017 vm_map_simplify(
10018         vm_map_t        map,
10019         vm_map_offset_t start)
10020 {
10021         vm_map_entry_t  this_entry;
10022
10023         vm_map_lock(map);
10024         if (vm_map_lookup_entry(map, start, &this_entry)) {
10025                 vm_map_simplify_entry(map, this_entry);
10026                 vm_map_simplify_entry(map, this_entry->vme_next);
10027         }
10028         counter(c_vm_map_simplify_called++);
10029         vm_map_unlock(map);
10030 }
10031
10032 static void
10033 vm_map_simplify_range(
10034         vm_map_t        map,
10035         vm_map_offset_t start,
10036         vm_map_offset_t end)
10037 {
10038         vm_map_entry_t  entry;
10039
10040         /*
10041          * The map should be locked (for "write") by the caller.
10042          */
10043
10044         if (start >= end) {
10045                 /* invalid address range */
10046                 return;
10047         }
10048
10049         start = vm_map_trunc_page(start);
10050         end = vm_map_round_page(end);
10051
10052         if (!vm_map_lookup_entry(map, start, &entry)) {
10053                 /* "start" is not mapped and "entry" ends before "start" */
10054                 if (entry == vm_map_to_entry(map)) {
10055                         /* start with first entry in the map */
10056                         entry = vm_map_first_entry(map);
10057                 } else {
10058                         /* start with next entry */
10059                         entry = entry->vme_next;
10060                 }
10061         }
10062
10063         while (entry != vm_map_to_entry(map) &&
10064                entry->vme_start <= end) {
10065                 /* try and coalesce "entry" with its previous entry */
10066                 vm_map_simplify_entry(map, entry);
10067                 entry = entry->vme_next;
10068         }
10069 }
10070
10071
10072 /*
10073  *      Routine:        vm_map_machine_attribute
10074  *      Purpose:
10075  *              Provide machine-specific attributes to mappings,
10076  *              such as cachability etc. for machines that provide
10077  *              them.  NUMA architectures and machines with big/strange
10078  *              caches will use this.
10079  *      Note:
10080  *              Responsibilities for locking and checking are handled here,
10081  *              everything else in the pmap module. If any non-volatile
10082  *              information must be kept, the pmap module should handle
10083  *              it itself. [This assumes that attributes do not
10084  *              need to be inherited, which seems ok to me]
10085  */
10086 kern_return_t
10087 vm_map_machine_attribute(
10088         vm_map_t                        map,
10089         vm_map_offset_t         start,
10090         vm_map_offset_t         end,
10091         vm_machine_attribute_t  attribute,
10092         vm_machine_attribute_val_t* value)              /* IN/OUT */
10093 {
10094         kern_return_t   ret;
10095         vm_map_size_t sync_size;
10096         vm_map_entry_t entry;
10097
10098         if (start < vm_map_min(map) || end > vm_map_max(map))
10099                 return KERN_INVALID_ADDRESS;
10100
10101         /* Figure how much memory we need to flush (in page increments) */
10102         sync_size = end - start;
10103
10104         vm_map_lock(map);
10105
10106         if (attribute != MATTR_CACHE) {
10107                 /* If we don't have to find physical addresses, we */
10108                 /* don't have to do an explicit traversal here.    */
10109                 ret = pmap_attribute(map->pmap, start, end-start,
10110                                      attribute, value);
10111                 vm_map_unlock(map);
10112                 return ret;
10113         }
10114
10115         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
10116
10117         while(sync_size) {
10118                 if (vm_map_lookup_entry(map, start, &entry)) {
10119                         vm_map_size_t   sub_size;
10120                         if((entry->vme_end - start) > sync_size) {
10121                                 sub_size = sync_size;
10122                                 sync_size = 0;
10123                         } else {
10124                                 sub_size = entry->vme_end - start;
10125                                 sync_size -= sub_size;
10126                         }
10127                         if(entry->is_sub_map) {
10128                                 vm_map_offset_t sub_start;
10129                                 vm_map_offset_t sub_end;
10130
10131                                 sub_start = (start - entry->vme_start)
10132                                         + entry->offset;
10133                                 sub_end = sub_start + sub_size;
10134                                 vm_map_machine_attribute(
10135                                         entry->object.sub_map,
10136                                         sub_start,
10137                                         sub_end,
10138                                         attribute, value);
10139                         } else {
10140                                 if(entry->object.vm_object) {
10141                                         vm_page_t               m;
10142                                         vm_object_t             object;
10143                                         vm_object_t             base_object;
10144                                         vm_object_t             last_object;
10145                                         vm_object_offset_t      offset;
10146                                         vm_object_offset_t      base_offset;
10147                                         vm_map_size_t           range;
10148                                         range = sub_size;
10149                                         offset = (start - entry->vme_start)
10150                                                 + entry->offset;
10151                                         base_offset = offset;
10152                                         object = entry->object.vm_object;
10153                                         base_object = object;
10154                                         last_object = NULL;
10155
10156                                         vm_object_lock(object);
10157
10158                                         while (range) {
10159                                                 m = vm_page_lookup(
10160                                                         object, offset);
10161
10162                                                 if (m && !m->fictitious) {
10163                                                         ret =
10164                                                                 pmap_attribute_cache_sync(
10165                                                                         m->phys_page,
10166                                                                         PAGE_SIZE,
10167                                                                         attribute, value);
10168
10169                                                 } else if (object->shadow) {
10170                                                         offset = offset + object->vo_shadow_offset;
10171                                                         last_object = object;
10172                                                         object = object->shadow;
10173                                                         vm_object_lock(last_object->shadow);
10174                                                         vm_object_unlock(last_object);
10175                                                         continue;
10176                                                 }
10177                                                 range -= PAGE_SIZE;
10178
10179                                                 if (base_object != object) {
10180                                                         vm_object_unlock(object);
10181                                                         vm_object_lock(base_object);
10182                                                         object = base_object;
10183                                                 }
10184                                                 /* Bump to the next page */
10185                                                 base_offset += PAGE_SIZE;
10186                                                 offset = base_offset;
10187                                         }
10188                                         vm_object_unlock(object);
10189                                 }
10190                         }
10191                         start += sub_size;
10192                 } else {
10193                         vm_map_unlock(map);
10194                         return KERN_FAILURE;
10195                 }
10196
10197         }
10198
10199         vm_map_unlock(map);
10200
10201         return ret;
10202 }
10203
10204 /*
10205  *      vm_map_behavior_set:
10206  *
10207  *      Sets the paging reference behavior of the specified address
10208  *      range in the target map.  Paging reference behavior affects
10209  *      how pagein operations resulting from faults on the map will be
10210  *      clustered.
10211  */
10212 kern_return_t
10213 vm_map_behavior_set(
10214         vm_map_t        map,
10215         vm_map_offset_t start,
10216         vm_map_offset_t end,
10217         vm_behavior_t   new_behavior)
10218 {
10219         register vm_map_entry_t entry;
10220         vm_map_entry_t  temp_entry;
10221
10222         XPR(XPR_VM_MAP,
10223             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10224             map, start, end, new_behavior, 0);
10225
10226         if (start > end ||
10227             start < vm_map_min(map) ||
10228             end > vm_map_max(map)) {
10229                 return KERN_NO_SPACE;
10230         }
10231
10232         switch (new_behavior) {
10233
10234         /*
10235          * This first block of behaviors all set a persistent state on the specified
10236          * memory range.  All we have to do here is to record the desired behavior
10237          * in the vm_map_entry_t's.
10238          */
10239
10240         case VM_BEHAVIOR_DEFAULT:
10241         case VM_BEHAVIOR_RANDOM:
10242         case VM_BEHAVIOR_SEQUENTIAL:
10243         case VM_BEHAVIOR_RSEQNTL:
10244         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10245                 vm_map_lock(map);
10246
10247                 /*
10248                  *      The entire address range must be valid for the map.
10249                  *      Note that vm_map_range_check() does a
10250                  *      vm_map_lookup_entry() internally and returns the
10251                  *      entry containing the start of the address range if
10252                  *      the entire range is valid.
10253                  */
10254                 if (vm_map_range_check(map, start, end, &temp_entry)) {
10255                         entry = temp_entry;
10256                         vm_map_clip_start(map, entry, start);
10257                 }
10258                 else {
10259                         vm_map_unlock(map);
10260                         return(KERN_INVALID_ADDRESS);
10261                 }
10262
10263                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10264                         vm_map_clip_end(map, entry, end);
10265                         assert(!entry->use_pmap);
10266
10267                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10268                                 entry->zero_wired_pages = TRUE;
10269                         } else {
10270                                 entry->behavior = new_behavior;
10271                         }
10272                         entry = entry->vme_next;
10273                 }
10274
10275                 vm_map_unlock(map);
10276                 break;
10277
10278         /*
10279          * The rest of these are different from the above in that they cause
10280          * an immediate action to take place as opposed to setting a behavior that
10281          * affects future actions.
10282          */
10283
10284         case VM_BEHAVIOR_WILLNEED:
10285                 return vm_map_willneed(map, start, end);
10286
10287         case VM_BEHAVIOR_DONTNEED:
10288                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10289
10290         case VM_BEHAVIOR_FREE:
10291                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10292
10293         case VM_BEHAVIOR_REUSABLE:
10294                 return vm_map_reusable_pages(map, start, end);
10295
10296         case VM_BEHAVIOR_REUSE:
10297                 return vm_map_reuse_pages(map, start, end);
10298
10299         case VM_BEHAVIOR_CAN_REUSE:
10300                 return vm_map_can_reuse(map, start, end);
10301
10302         default:
10303                 return(KERN_INVALID_ARGUMENT);
10304         }
10305
10306         return(KERN_SUCCESS);
10307 }
10308
10309
10310 /*
10311  * Internals for madvise(MADV_WILLNEED) system call.
10312  *
10313  * The present implementation is to do a read-ahead if the mapping corresponds
10314  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10315  * and basically ignore the "advice" (which we are always free to do).
10316  */
10317
10318
10319 static kern_return_t
10320 vm_map_willneed(
10321         vm_map_t        map,
10322         vm_map_offset_t start,
10323         vm_map_offset_t end
10324 )
10325 {
10326         vm_map_entry_t                  entry;
10327         vm_object_t                     object;
10328         memory_object_t                 pager;
10329         struct vm_object_fault_info     fault_info;
10330         kern_return_t                   kr;
10331         vm_object_size_t                len;
10332         vm_object_offset_t              offset;
10333
10334         /*
10335          * Fill in static values in fault_info.  Several fields get ignored by the code
10336          * we call, but we'll fill them in anyway since uninitialized fields are bad
10337          * when it comes to future backwards compatibility.
10338          */
10339
10340         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10341         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10342         fault_info.no_cache      = FALSE;                       /* ignored value */
10343         fault_info.stealth       = TRUE;
10344         fault_info.io_sync = FALSE;
10345         fault_info.cs_bypass = FALSE;
10346         fault_info.mark_zf_absent = FALSE;
10347
10348         /*
10349          * The MADV_WILLNEED operation doesn't require any changes to the
10350          * vm_map_entry_t's, so the read lock is sufficient.
10351          */
10352
10353         vm_map_lock_read(map);
10354
10355         /*
10356          * The madvise semantics require that the address range be fully
10357          * allocated with no holes.  Otherwise, we're required to return
10358          * an error.
10359          */
10360
10361         if (! vm_map_range_check(map, start, end, &entry)) {
10362                 vm_map_unlock_read(map);
10363                 return KERN_INVALID_ADDRESS;
10364         }
10365
10366         /*
10367          * Examine each vm_map_entry_t in the range.
10368          */
10369         for (; entry != vm_map_to_entry(map) && start < end; ) {
10370
10371                 /*
10372                  * The first time through, the start address could be anywhere
10373                  * within the vm_map_entry we found.  So adjust the offset to
10374                  * correspond.  After that, the offset will always be zero to
10375                  * correspond to the beginning of the current vm_map_entry.
10376                  */
10377                 offset = (start - entry->vme_start) + entry->offset;
10378
10379                 /*
10380                  * Set the length so we don't go beyond the end of the
10381                  * map_entry or beyond the end of the range we were given.
10382                  * This range could span also multiple map entries all of which
10383                  * map different files, so make sure we only do the right amount
10384                  * of I/O for each object.  Note that it's possible for there
10385                  * to be multiple map entries all referring to the same object
10386                  * but with different page permissions, but it's not worth
10387                  * trying to optimize that case.
10388                  */
10389                 len = MIN(entry->vme_end - start, end - start);
10390
10391                 if ((vm_size_t) len != len) {
10392                         /* 32-bit overflow */
10393                         len = (vm_size_t) (0 - PAGE_SIZE);
10394                 }
10395                 fault_info.cluster_size = (vm_size_t) len;
10396                 fault_info.lo_offset    = offset;
10397                 fault_info.hi_offset    = offset + len;
10398                 fault_info.user_tag     = entry->alias;
10399
10400                 /*
10401                  * If there's no read permission to this mapping, then just
10402                  * skip it.
10403                  */
10404                 if ((entry->protection & VM_PROT_READ) == 0) {
10405                         entry = entry->vme_next;
10406                         start = entry->vme_start;
10407                         continue;
10408                 }
10409
10410                 /*
10411                  * Find the file object backing this map entry.  If there is
10412                  * none, then we simply ignore the "will need" advice for this
10413                  * entry and go on to the next one.
10414                  */
10415                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10416                         entry = entry->vme_next;
10417                         start = entry->vme_start;
10418                         continue;
10419                 }
10420
10421                 /*
10422                  * The data_request() could take a long time, so let's
10423                  * release the map lock to avoid blocking other threads.
10424                  */
10425                 vm_map_unlock_read(map);
10426
10427                 vm_object_paging_begin(object);
10428                 pager = object->pager;
10429                 vm_object_unlock(object);
10430
10431                 /*
10432                  * Get the data from the object asynchronously.
10433                  *
10434                  * Note that memory_object_data_request() places limits on the
10435                  * amount of I/O it will do.  Regardless of the len we
10436                  * specified, it won't do more than MAX_UPL_TRANSFER and it
10437                  * silently truncates the len to that size.  This isn't
10438                  * necessarily bad since madvise shouldn't really be used to
10439                  * page in unlimited amounts of data.  Other Unix variants
10440                  * limit the willneed case as well.  If this turns out to be an
10441                  * issue for developers, then we can always adjust the policy
10442                  * here and still be backwards compatible since this is all
10443                  * just "advice".
10444                  */
10445                 kr = memory_object_data_request(
10446                         pager,
10447                         offset + object->paging_offset,
10448                         0,      /* ignored */
10449                         VM_PROT_READ,
10450                         (memory_object_fault_info_t)&fault_info);
10451
10452                 vm_object_lock(object);
10453                 vm_object_paging_end(object);
10454                 vm_object_unlock(object);
10455
10456                 /*
10457                  * If we couldn't do the I/O for some reason, just give up on
10458                  * the madvise.  We still return success to the user since
10459                  * madvise isn't supposed to fail when the advice can't be
10460                  * taken.
10461                  */
10462                 if (kr != KERN_SUCCESS) {
10463                         return KERN_SUCCESS;
10464                 }
10465
10466                 start += len;
10467                 if (start >= end) {
10468                         /* done */
10469                         return KERN_SUCCESS;
10470                 }
10471
10472                 /* look up next entry */
10473                 vm_map_lock_read(map);
10474                 if (! vm_map_lookup_entry(map, start, &entry)) {
10475                         /*
10476                          * There's a new hole in the address range.
10477                          */
10478                         vm_map_unlock_read(map);
10479                         return KERN_INVALID_ADDRESS;
10480                 }
10481         }
10482
10483         vm_map_unlock_read(map);
10484         return KERN_SUCCESS;
10485 }
10486
10487 static boolean_t
10488 vm_map_entry_is_reusable(
10489         vm_map_entry_t entry)
10490 {
10491         vm_object_t object;
10492
10493         if (entry->is_shared ||
10494             entry->is_sub_map ||
10495             entry->in_transition ||
10496             entry->protection != VM_PROT_DEFAULT ||
10497             entry->max_protection != VM_PROT_ALL ||
10498             entry->inheritance != VM_INHERIT_DEFAULT ||
10499             entry->no_cache ||
10500             entry->permanent ||
10501             entry->superpage_size != 0 ||
10502             entry->zero_wired_pages ||
10503             entry->wired_count != 0 ||
10504             entry->user_wired_count != 0) {
10505                 return FALSE;
10506         }
10507
10508         object = entry->object.vm_object;
10509         if (object == VM_OBJECT_NULL) {
10510                 return TRUE;
10511         }
10512         if (object->ref_count == 1 &&
10513             object->wired_page_count == 0 &&
10514             object->copy == VM_OBJECT_NULL &&
10515             object->shadow == VM_OBJECT_NULL &&
10516             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10517             object->internal &&
10518             !object->true_share &&
10519             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10520             !object->code_signed) {
10521                 return TRUE;
10522         }
10523         return FALSE;
10524
10525
10526 }
10527
10528 static kern_return_t
10529 vm_map_reuse_pages(
10530         vm_map_t        map,
10531         vm_map_offset_t start,
10532         vm_map_offset_t end)
10533 {
10534         vm_map_entry_t                  entry;
10535         vm_object_t                     object;
10536         vm_object_offset_t              start_offset, end_offset;
10537
10538         /*
10539          * The MADV_REUSE operation doesn't require any changes to the
10540          * vm_map_entry_t's, so the read lock is sufficient.
10541          */
10542
10543         vm_map_lock_read(map);
10544
10545         /*
10546          * The madvise semantics require that the address range be fully
10547          * allocated with no holes.  Otherwise, we're required to return
10548          * an error.
10549          */
10550
10551         if (!vm_map_range_check(map, start, end, &entry)) {
10552                 vm_map_unlock_read(map);
10553                 vm_page_stats_reusable.reuse_pages_failure++;
10554                 return KERN_INVALID_ADDRESS;
10555         }
10556
10557         /*
10558          * Examine each vm_map_entry_t in the range.
10559          */
10560         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10561              entry = entry->vme_next) {
10562                 /*
10563                  * Sanity check on the VM map entry.
10564                  */
10565                 if (! vm_map_entry_is_reusable(entry)) {
10566                         vm_map_unlock_read(map);
10567                         vm_page_stats_reusable.reuse_pages_failure++;
10568                         return KERN_INVALID_ADDRESS;
10569                 }
10570
10571                 /*
10572                  * The first time through, the start address could be anywhere
10573                  * within the vm_map_entry we found.  So adjust the offset to
10574                  * correspond.
10575                  */
10576                 if (entry->vme_start < start) {
10577                         start_offset = start - entry->vme_start;
10578                 } else {
10579                         start_offset = 0;
10580                 }
10581                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10582                 start_offset += entry->offset;
10583                 end_offset += entry->offset;
10584
10585                 object = entry->object.vm_object;
10586                 if (object != VM_OBJECT_NULL) {
10587                         vm_object_lock(object);
10588                         vm_object_reuse_pages(object, start_offset, end_offset,
10589                                               TRUE);
10590                         vm_object_unlock(object);
10591                 }
10592
10593                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10594                         /*
10595                          * XXX
10596                          * We do not hold the VM map exclusively here.
10597                          * The "alias" field is not that critical, so it's
10598                          * safe to update it here, as long as it is the only
10599                          * one that can be modified while holding the VM map
10600                          * "shared".
10601                          */
10602                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10603                 }
10604         }
10605
10606         vm_map_unlock_read(map);
10607         vm_page_stats_reusable.reuse_pages_success++;
10608         return KERN_SUCCESS;
10609 }
10610
10611
10612 static kern_return_t
10613 vm_map_reusable_pages(
10614         vm_map_t        map,
10615         vm_map_offset_t start,
10616         vm_map_offset_t end)
10617 {
10618         vm_map_entry_t                  entry;
10619         vm_object_t                     object;
10620         vm_object_offset_t              start_offset, end_offset;
10621
10622         /*
10623          * The MADV_REUSABLE operation doesn't require any changes to the
10624          * vm_map_entry_t's, so the read lock is sufficient.
10625          */
10626
10627         vm_map_lock_read(map);
10628
10629         /*
10630          * The madvise semantics require that the address range be fully
10631          * allocated with no holes.  Otherwise, we're required to return
10632          * an error.
10633          */
10634
10635         if (!vm_map_range_check(map, start, end, &entry)) {
10636                 vm_map_unlock_read(map);
10637                 vm_page_stats_reusable.reusable_pages_failure++;
10638                 return KERN_INVALID_ADDRESS;
10639         }
10640
10641         /*
10642          * Examine each vm_map_entry_t in the range.
10643          */
10644         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10645              entry = entry->vme_next) {
10646                 int kill_pages = 0;
10647
10648                 /*
10649                  * Sanity check on the VM map entry.
10650                  */
10651                 if (! vm_map_entry_is_reusable(entry)) {
10652                         vm_map_unlock_read(map);
10653                         vm_page_stats_reusable.reusable_pages_failure++;
10654                         return KERN_INVALID_ADDRESS;
10655                 }
10656
10657                 /*
10658                  * The first time through, the start address could be anywhere
10659                  * within the vm_map_entry we found.  So adjust the offset to
10660                  * correspond.
10661                  */
10662                 if (entry->vme_start < start) {
10663                         start_offset = start - entry->vme_start;
10664                 } else {
10665                         start_offset = 0;
10666                 }
10667                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10668                 start_offset += entry->offset;
10669                 end_offset += entry->offset;
10670
10671                 object = entry->object.vm_object;
10672                 if (object == VM_OBJECT_NULL)
10673                         continue;
10674
10675
10676                 vm_object_lock(object);
10677                 if (object->ref_count == 1 && !object->shadow)
10678                         kill_pages = 1;
10679                 else
10680                         kill_pages = -1;
10681                 if (kill_pages != -1) {
10682                         vm_object_deactivate_pages(object,
10683                                                    start_offset,
10684                                                    end_offset - start_offset,
10685                                                    kill_pages,
10686                                                    TRUE /*reusable_pages*/);
10687                 } else {
10688                         vm_page_stats_reusable.reusable_pages_shared++;
10689                 }
10690                 vm_object_unlock(object);
10691
10692                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10693                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10694                         /*
10695                          * XXX
10696                          * We do not hold the VM map exclusively here.
10697                          * The "alias" field is not that critical, so it's
10698                          * safe to update it here, as long as it is the only
10699                          * one that can be modified while holding the VM map
10700                          * "shared".
10701                          */
10702                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10703                 }
10704         }
10705
10706         vm_map_unlock_read(map);
10707         vm_page_stats_reusable.reusable_pages_success++;
10708         return KERN_SUCCESS;
10709 }
10710
10711
10712 static kern_return_t
10713 vm_map_can_reuse(
10714         vm_map_t        map,
10715         vm_map_offset_t start,
10716         vm_map_offset_t end)
10717 {
10718         vm_map_entry_t                  entry;
10719
10720         /*
10721          * The MADV_REUSABLE operation doesn't require any changes to the
10722          * vm_map_entry_t's, so the read lock is sufficient.
10723          */
10724
10725         vm_map_lock_read(map);
10726
10727         /*
10728          * The madvise semantics require that the address range be fully
10729          * allocated with no holes.  Otherwise, we're required to return
10730          * an error.
10731          */
10732
10733         if (!vm_map_range_check(map, start, end, &entry)) {
10734                 vm_map_unlock_read(map);
10735                 vm_page_stats_reusable.can_reuse_failure++;
10736                 return KERN_INVALID_ADDRESS;
10737         }
10738
10739         /*
10740          * Examine each vm_map_entry_t in the range.
10741          */
10742         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10743              entry = entry->vme_next) {
10744                 /*
10745                  * Sanity check on the VM map entry.
10746                  */
10747                 if (! vm_map_entry_is_reusable(entry)) {
10748                         vm_map_unlock_read(map);
10749                         vm_page_stats_reusable.can_reuse_failure++;
10750                         return KERN_INVALID_ADDRESS;
10751                 }
10752         }
10753
10754         vm_map_unlock_read(map);
10755         vm_page_stats_reusable.can_reuse_success++;
10756         return KERN_SUCCESS;
10757 }
10758
10759
10760
10761 #include <mach_kdb.h>
10762 #if     MACH_KDB
10763 #include <ddb/db_output.h>
10764 #include <vm/vm_print.h>
10765
10766 #define printf  db_printf
10767
10768 /*
10769  * Forward declarations for internal functions.
10770  */
10771 extern void vm_map_links_print(
10772         struct vm_map_links     *links);
10773
10774 extern void vm_map_header_print(
10775         struct vm_map_header    *header);
10776
10777 extern void vm_map_entry_print(
10778         vm_map_entry_t          entry);
10779
10780 extern void vm_follow_entry(
10781         vm_map_entry_t          entry);
10782
10783 extern void vm_follow_map(
10784         vm_map_t                map);
10785
10786 /*
10787  *      vm_map_links_print:     [ debug ]
10788  */
10789 void
10790 vm_map_links_print(
10791         struct vm_map_links     *links)
10792 {
10793         iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
10794                 links->prev,
10795                 links->next,
10796                 (unsigned long long)links->start,
10797                 (unsigned long long)links->end);
10798 }
10799
10800 /*
10801  *      vm_map_header_print:    [ debug ]
10802  */
10803 void
10804 vm_map_header_print(
10805         struct vm_map_header    *header)
10806 {
10807         vm_map_links_print(&header->links);
10808         iprintf("nentries = %08X, %sentries_pageable\n",
10809                 header->nentries,
10810                 (header->entries_pageable ? "" : "!"));
10811 }
10812
10813 /*
10814  *      vm_follow_entry:        [ debug ]
10815  */
10816 void
10817 vm_follow_entry(
10818         vm_map_entry_t entry)
10819 {
10820         int shadows;
10821
10822         iprintf("map entry %08X\n", entry);
10823
10824         db_indent += 2;
10825
10826         shadows = vm_follow_object(entry->object.vm_object);
10827         iprintf("Total objects : %d\n",shadows);
10828
10829         db_indent -= 2;
10830 }
10831
10832 /*
10833  *      vm_map_entry_print:     [ debug ]
10834  */
10835 void
10836 vm_map_entry_print(
10837         register vm_map_entry_t entry)
10838 {
10839         static const char *inheritance_name[4] =
10840                 { "share", "copy", "none", "?"};
10841         static const char *behavior_name[4] =
10842                 { "dflt", "rand", "seqtl", "rseqntl" };
10843
10844         iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10845
10846         db_indent += 2;
10847
10848         vm_map_links_print(&entry->links);
10849
10850         iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
10851                 (unsigned long long)entry->vme_start,
10852                 (unsigned long long)entry->vme_end,
10853                 entry->protection,
10854                 entry->max_protection,
10855                 inheritance_name[(entry->inheritance & 0x3)]);
10856
10857         iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10858                 behavior_name[(entry->behavior & 0x3)],
10859                 entry->wired_count,
10860                 entry->user_wired_count);
10861         iprintf("%sin_transition, %sneeds_wakeup\n",
10862                 (entry->in_transition ? "" : "!"),
10863                 (entry->needs_wakeup ? "" : "!"));
10864
10865         if (entry->is_sub_map) {
10866                 iprintf("submap = %08X - offset = %016llX\n",
10867                         entry->object.sub_map,
10868                         (unsigned long long)entry->offset);
10869         } else {
10870                 iprintf("object = %08X  offset = %016llX - ",
10871                         entry->object.vm_object,
10872                         (unsigned long long)entry->offset);
10873                 printf("%sis_shared, %sneeds_copy\n",
10874                        (entry->is_shared ? "" : "!"),
10875                        (entry->needs_copy ? "" : "!"));
10876         }
10877
10878         db_indent -= 2;
10879 }
10880
10881 /*
10882  *      vm_follow_map:  [ debug ]
10883  */
10884 void
10885 vm_follow_map(
10886         vm_map_t map)
10887 {
10888         register vm_map_entry_t entry;
10889
10890         iprintf("task map %08X\n", map);
10891
10892         db_indent += 2;
10893
10894         for (entry = vm_map_first_entry(map);
10895              entry && entry != vm_map_to_entry(map);
10896              entry = entry->vme_next) {
10897                 vm_follow_entry(entry);
10898         }
10899
10900         db_indent -= 2;
10901 }
10902
10903 /*
10904  *      vm_map_print:   [ debug ]
10905  */
10906 void
10907 vm_map_print(
10908         db_addr_t inmap)
10909 {
10910         register vm_map_entry_t entry;
10911         vm_map_t map;
10912 #if TASK_SWAPPER
10913         char *swstate;
10914 #endif /* TASK_SWAPPER */
10915
10916         map = (vm_map_t)(long)
10917                 inmap;  /* Make sure we have the right type */
10918
10919         iprintf("task map %08X\n", map);
10920
10921         db_indent += 2;
10922
10923         vm_map_header_print(&map->hdr);
10924
10925         iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
10926                 map->pmap,
10927                 map->size,
10928                 map->ref_count,
10929                 map->hint,
10930                 map->first_free);
10931
10932         iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10933                 (map->wait_for_space ? "" : "!"),
10934                 (map->wiring_required ? "" : "!"),
10935                 map->timestamp);
10936
10937 #if     TASK_SWAPPER
10938         switch (map->sw_state) {
10939         case MAP_SW_IN:
10940                 swstate = "SW_IN";
10941                 break;
10942         case MAP_SW_OUT:
10943                 swstate = "SW_OUT";
10944                 break;
10945         default:
10946                 swstate = "????";
10947                 break;
10948         }
10949         iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10950 #endif  /* TASK_SWAPPER */
10951
10952         for (entry = vm_map_first_entry(map);
10953              entry && entry != vm_map_to_entry(map);
10954              entry = entry->vme_next) {
10955                 vm_map_entry_print(entry);
10956         }
10957
10958         db_indent -= 2;
10959 }
10960
10961 /*
10962  *      Routine:        vm_map_copy_print
10963  *      Purpose:
10964  *              Pretty-print a copy object for ddb.
10965  */
10966
10967 void
10968 vm_map_copy_print(
10969         db_addr_t       incopy)
10970 {
10971         vm_map_copy_t copy;
10972         vm_map_entry_t entry;
10973
10974         copy = (vm_map_copy_t)(long)
10975                 incopy; /* Make sure we have the right type */
10976
10977         printf("copy object 0x%x\n", copy);
10978
10979         db_indent += 2;
10980
10981         iprintf("type=%d", copy->type);
10982         switch (copy->type) {
10983         case VM_MAP_COPY_ENTRY_LIST:
10984                 printf("[entry_list]");
10985                 break;
10986
10987         case VM_MAP_COPY_OBJECT:
10988                 printf("[object]");
10989                 break;
10990
10991         case VM_MAP_COPY_KERNEL_BUFFER:
10992                 printf("[kernel_buffer]");
10993                 break;
10994
10995         default:
10996                 printf("[bad type]");
10997                 break;
10998         }
10999         printf(", offset=0x%llx", (unsigned long long)copy->offset);
11000         printf(", size=0x%x\n", copy->size);
11001
11002         switch (copy->type) {
11003         case VM_MAP_COPY_ENTRY_LIST:
11004                 vm_map_header_print(&copy->cpy_hdr);
11005                 for (entry = vm_map_copy_first_entry(copy);
11006                      entry && entry != vm_map_copy_to_entry(copy);
11007                      entry = entry->vme_next) {
11008                         vm_map_entry_print(entry);
11009                 }
11010                 break;
11011
11012         case VM_MAP_COPY_OBJECT:
11013                 iprintf("object=0x%x\n", copy->cpy_object);
11014                 break;
11015
11016         case VM_MAP_COPY_KERNEL_BUFFER:
11017                 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
11018                 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
11019                 break;
11020
11021         }
11022
11023         db_indent -=2;
11024 }
11025
11026 /*
11027  *      db_vm_map_total_size(map)       [ debug ]
11028  *
11029  *      return the total virtual size (in bytes) of the map
11030  */
11031 vm_map_size_t
11032 db_vm_map_total_size(
11033         db_addr_t       inmap)
11034 {
11035         vm_map_entry_t  entry;
11036         vm_map_size_t   total;
11037         vm_map_t map;
11038
11039         map = (vm_map_t)(long)
11040                 inmap;  /* Make sure we have the right type */
11041
11042         total = 0;
11043         for (entry = vm_map_first_entry(map);
11044              entry != vm_map_to_entry(map);
11045              entry = entry->vme_next) {
11046                 total += entry->vme_end - entry->vme_start;
11047         }
11048
11049         return total;
11050 }
11051
11052 #endif  /* MACH_KDB */
11053
11054 /*
11055  *      Routine:        vm_map_entry_insert
11056  *
11057  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
11058  */
11059 vm_map_entry_t
11060 vm_map_entry_insert(
11061         vm_map_t                map,
11062         vm_map_entry_t          insp_entry,
11063         vm_map_offset_t         start,
11064         vm_map_offset_t         end,
11065         vm_object_t             object,
11066         vm_object_offset_t      offset,
11067         boolean_t               needs_copy,
11068         boolean_t               is_shared,
11069         boolean_t               in_transition,
11070         vm_prot_t               cur_protection,
11071         vm_prot_t               max_protection,
11072         vm_behavior_t           behavior,
11073         vm_inherit_t            inheritance,
11074         unsigned                wired_count,
11075         boolean_t               no_cache,
11076         boolean_t               permanent,
11077         unsigned int            superpage_size)
11078 {
11079         vm_map_entry_t  new_entry;
11080
11081         assert(insp_entry != (vm_map_entry_t)0);
11082
11083         new_entry = vm_map_entry_create(map);
11084
11085         new_entry->vme_start = start;
11086         new_entry->vme_end = end;
11087         assert(page_aligned(new_entry->vme_start));
11088         assert(page_aligned(new_entry->vme_end));
11089
11090         new_entry->object.vm_object = object;
11091         new_entry->offset = offset;
11092         new_entry->is_shared = is_shared;
11093         new_entry->is_sub_map = FALSE;
11094         new_entry->needs_copy = needs_copy;
11095         new_entry->in_transition = in_transition;
11096         new_entry->needs_wakeup = FALSE;
11097         new_entry->inheritance = inheritance;
11098         new_entry->protection = cur_protection;
11099         new_entry->max_protection = max_protection;
11100         new_entry->behavior = behavior;
11101         new_entry->wired_count = wired_count;
11102         new_entry->user_wired_count = 0;
11103         new_entry->use_pmap = FALSE;
11104         new_entry->alias = 0;
11105         new_entry->zero_wired_pages = FALSE;
11106         new_entry->no_cache = no_cache;
11107         new_entry->permanent = permanent;
11108         new_entry->superpage_size = superpage_size;
11109         new_entry->used_for_jit = FALSE;
11110
11111         /*
11112          *      Insert the new entry into the list.
11113          */
11114
11115         vm_map_store_entry_link(map, insp_entry, new_entry);
11116         map->size += end - start;
11117
11118         /*
11119          *      Update the free space hint and the lookup hint.
11120          */
11121
11122         SAVE_HINT_MAP_WRITE(map, new_entry);
11123         return new_entry;
11124 }
11125
11126 /*
11127  *      Routine:        vm_map_remap_extract
11128  *
11129  *      Descritpion:    This routine returns a vm_entry list from a map.
11130  */
11131 static kern_return_t
11132 vm_map_remap_extract(
11133         vm_map_t                map,
11134         vm_map_offset_t         addr,
11135         vm_map_size_t           size,
11136         boolean_t               copy,
11137         struct vm_map_header    *map_header,
11138         vm_prot_t               *cur_protection,
11139         vm_prot_t               *max_protection,
11140         /* What, no behavior? */
11141         vm_inherit_t            inheritance,
11142         boolean_t               pageable)
11143 {
11144         kern_return_t           result;
11145         vm_map_size_t           mapped_size;
11146         vm_map_size_t           tmp_size;
11147         vm_map_entry_t          src_entry;     /* result of last map lookup */
11148         vm_map_entry_t          new_entry;
11149         vm_object_offset_t      offset;
11150         vm_map_offset_t         map_address;
11151         vm_map_offset_t         src_start;     /* start of entry to map */
11152         vm_map_offset_t         src_end;       /* end of region to be mapped */
11153         vm_object_t             object;
11154         vm_map_version_t        version;
11155         boolean_t               src_needs_copy;
11156         boolean_t               new_entry_needs_copy;
11157
11158         assert(map != VM_MAP_NULL);
11159         assert(size != 0 && size == vm_map_round_page(size));
11160         assert(inheritance == VM_INHERIT_NONE ||
11161                inheritance == VM_INHERIT_COPY ||
11162                inheritance == VM_INHERIT_SHARE);
11163
11164         /*
11165          *      Compute start and end of region.
11166          */
11167         src_start = vm_map_trunc_page(addr);
11168         src_end = vm_map_round_page(src_start + size);
11169
11170         /*
11171          *      Initialize map_header.
11172          */
11173         map_header->links.next = (struct vm_map_entry *)&map_header->links;
11174         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11175         map_header->nentries = 0;
11176         map_header->entries_pageable = pageable;
11177
11178         vm_map_store_init( map_header );
11179
11180         *cur_protection = VM_PROT_ALL;
11181         *max_protection = VM_PROT_ALL;
11182
11183         map_address = 0;
11184         mapped_size = 0;
11185         result = KERN_SUCCESS;
11186
11187         /*
11188          *      The specified source virtual space might correspond to
11189          *      multiple map entries, need to loop on them.
11190          */
11191         vm_map_lock(map);
11192         while (mapped_size != size) {
11193                 vm_map_size_t   entry_size;
11194
11195                 /*
11196                  *      Find the beginning of the region.
11197                  */
11198                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11199                         result = KERN_INVALID_ADDRESS;
11200                         break;
11201                 }
11202
11203                 if (src_start < src_entry->vme_start ||
11204                     (mapped_size && src_start != src_entry->vme_start)) {
11205                         result = KERN_INVALID_ADDRESS;
11206                         break;
11207                 }
11208
11209                 tmp_size = size - mapped_size;
11210                 if (src_end > src_entry->vme_end)
11211                         tmp_size -= (src_end - src_entry->vme_end);
11212
11213                 entry_size = (vm_map_size_t)(src_entry->vme_end -
11214                                              src_entry->vme_start);
11215
11216                 if(src_entry->is_sub_map) {
11217                         vm_map_reference(src_entry->object.sub_map);
11218                         object = VM_OBJECT_NULL;
11219                 } else {
11220                         object = src_entry->object.vm_object;
11221
11222                         if (object == VM_OBJECT_NULL) {
11223                                 object = vm_object_allocate(entry_size);
11224                                 src_entry->offset = 0;
11225                                 src_entry->object.vm_object = object;
11226                         } else if (object->copy_strategy !=
11227                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11228                                 /*
11229                                  *      We are already using an asymmetric
11230                                  *      copy, and therefore we already have
11231                                  *      the right object.
11232                                  */
11233                                 assert(!src_entry->needs_copy);
11234                         } else if (src_entry->needs_copy || object->shadowed ||
11235                                    (object->internal && !object->true_share &&
11236                                     !src_entry->is_shared &&
11237                                     object->vo_size > entry_size)) {
11238
11239                                 vm_object_shadow(&src_entry->object.vm_object,
11240                                                  &src_entry->offset,
11241                                                  entry_size);
11242
11243                                 if (!src_entry->needs_copy &&
11244                                     (src_entry->protection & VM_PROT_WRITE)) {
11245                                         vm_prot_t prot;
11246
11247                                         prot = src_entry->protection & ~VM_PROT_WRITE;
11248
11249                                         if (override_nx(map, src_entry->alias) && prot)
11250                                                 prot |= VM_PROT_EXECUTE;
11251
11252                                         if(map->mapped) {
11253                                                 vm_object_pmap_protect(
11254                                                         src_entry->object.vm_object,
11255                                                         src_entry->offset,
11256                                                         entry_size,
11257                                                         PMAP_NULL,
11258                                                         src_entry->vme_start,
11259                                                         prot);
11260                                         } else {
11261                                                 pmap_protect(vm_map_pmap(map),
11262                                                              src_entry->vme_start,
11263                                                              src_entry->vme_end,
11264                                                              prot);
11265                                         }
11266                                 }
11267
11268                                 object = src_entry->object.vm_object;
11269                                 src_entry->needs_copy = FALSE;
11270                         }
11271
11272
11273                         vm_object_lock(object);
11274                         vm_object_reference_locked(object); /* object ref. for new entry */
11275                         if (object->copy_strategy ==
11276                             MEMORY_OBJECT_COPY_SYMMETRIC) {
11277                                 object->copy_strategy =
11278                                         MEMORY_OBJECT_COPY_DELAY;
11279                         }
11280                         vm_object_unlock(object);
11281                 }
11282
11283                 offset = src_entry->offset + (src_start - src_entry->vme_start);
11284
11285                 new_entry = _vm_map_entry_create(map_header);
11286                 vm_map_entry_copy(new_entry, src_entry);
11287                 new_entry->use_pmap = FALSE; /* clr address space specifics */
11288
11289                 new_entry->vme_start = map_address;
11290                 new_entry->vme_end = map_address + tmp_size;
11291                 new_entry->inheritance = inheritance;
11292                 new_entry->offset = offset;
11293
11294                 /*
11295                  * The new region has to be copied now if required.
11296                  */
11297         RestartCopy:
11298                 if (!copy) {
11299                         src_entry->is_shared = TRUE;
11300                         new_entry->is_shared = TRUE;
11301                         if (!(new_entry->is_sub_map))
11302                                 new_entry->needs_copy = FALSE;
11303
11304                 } else if (src_entry->is_sub_map) {
11305                         /* make this a COW sub_map if not already */
11306                         new_entry->needs_copy = TRUE;
11307                         object = VM_OBJECT_NULL;
11308                 } else if (src_entry->wired_count == 0 &&
11309                            vm_object_copy_quickly(&new_entry->object.vm_object,
11310                                                   new_entry->offset,
11311                                                   (new_entry->vme_end -
11312                                                    new_entry->vme_start),
11313                                                   &src_needs_copy,
11314                                                   &new_entry_needs_copy)) {
11315
11316                         new_entry->needs_copy = new_entry_needs_copy;
11317                         new_entry->is_shared = FALSE;
11318
11319                         /*
11320                          * Handle copy_on_write semantics.
11321                          */
11322                         if (src_needs_copy && !src_entry->needs_copy) {
11323                                 vm_prot_t prot;
11324
11325                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11326
11327                                 if (override_nx(map, src_entry->alias) && prot)
11328                                         prot |= VM_PROT_EXECUTE;
11329
11330                                 vm_object_pmap_protect(object,
11331                                                        offset,
11332                                                        entry_size,
11333                                                        ((src_entry->is_shared
11334                                                          || map->mapped) ?
11335                                                         PMAP_NULL : map->pmap),
11336                                                        src_entry->vme_start,
11337                                                        prot);
11338
11339                                 src_entry->needs_copy = TRUE;
11340                         }
11341                         /*
11342                          * Throw away the old object reference of the new entry.
11343                          */
11344                         vm_object_deallocate(object);
11345
11346                 } else {
11347                         new_entry->is_shared = FALSE;
11348
11349                         /*
11350                          * The map can be safely unlocked since we
11351                          * already hold a reference on the object.
11352                          *
11353                          * Record the timestamp of the map for later
11354                          * verification, and unlock the map.
11355                          */
11356                         version.main_timestamp = map->timestamp;
11357                         vm_map_unlock(map);     /* Increments timestamp once! */
11358
11359                         /*
11360                          * Perform the copy.
11361                          */
11362                         if (src_entry->wired_count > 0) {
11363                                 vm_object_lock(object);
11364                                 result = vm_object_copy_slowly(
11365                                         object,
11366                                         offset,
11367                                         entry_size,
11368                                         THREAD_UNINT,
11369                                         &new_entry->object.vm_object);
11370
11371                                 new_entry->offset = 0;
11372                                 new_entry->needs_copy = FALSE;
11373                         } else {
11374                                 result = vm_object_copy_strategically(
11375                                         object,
11376                                         offset,
11377                                         entry_size,
11378                                         &new_entry->object.vm_object,
11379                                         &new_entry->offset,
11380                                         &new_entry_needs_copy);
11381
11382                                 new_entry->needs_copy = new_entry_needs_copy;
11383                         }
11384
11385                         /*
11386                          * Throw away the old object reference of the new entry.
11387                          */
11388                         vm_object_deallocate(object);
11389
11390                         if (result != KERN_SUCCESS &&
11391                             result != KERN_MEMORY_RESTART_COPY) {
11392                                 _vm_map_entry_dispose(map_header, new_entry);
11393                                 break;
11394                         }
11395
11396                         /*
11397                          * Verify that the map has not substantially
11398                          * changed while the copy was being made.
11399                          */
11400
11401                         vm_map_lock(map);
11402                         if (version.main_timestamp + 1 != map->timestamp) {
11403                                 /*
11404                                  * Simple version comparison failed.
11405                                  *
11406                                  * Retry the lookup and verify that the
11407                                  * same object/offset are still present.
11408                                  */
11409                                 vm_object_deallocate(new_entry->
11410                                                      object.vm_object);
11411                                 _vm_map_entry_dispose(map_header, new_entry);
11412                                 if (result == KERN_MEMORY_RESTART_COPY)
11413                                         result = KERN_SUCCESS;
11414                                 continue;
11415                         }
11416
11417                         if (result == KERN_MEMORY_RESTART_COPY) {
11418                                 vm_object_reference(object);
11419                                 goto RestartCopy;
11420                         }
11421                 }
11422
11423                 _vm_map_store_entry_link(map_header,
11424                                    map_header->links.prev, new_entry);
11425
11426                 /*Protections for submap mapping are irrelevant here*/
11427                 if( !src_entry->is_sub_map ) {
11428                         *cur_protection &= src_entry->protection;
11429                         *max_protection &= src_entry->max_protection;
11430                 }
11431                 map_address += tmp_size;
11432                 mapped_size += tmp_size;
11433                 src_start += tmp_size;
11434
11435         } /* end while */
11436
11437         vm_map_unlock(map);
11438         if (result != KERN_SUCCESS) {
11439                 /*
11440                  * Free all allocated elements.
11441                  */
11442                 for (src_entry = map_header->links.next;
11443                      src_entry != (struct vm_map_entry *)&map_header->links;
11444                      src_entry = new_entry) {
11445                         new_entry = src_entry->vme_next;
11446                         _vm_map_store_entry_unlink(map_header, src_entry);
11447                         vm_object_deallocate(src_entry->object.vm_object);
11448                         _vm_map_entry_dispose(map_header, src_entry);
11449                 }
11450         }
11451         return result;
11452 }
11453
11454 /*
11455  *      Routine:        vm_remap
11456  *
11457  *                      Map portion of a task's address space.
11458  *                      Mapped region must not overlap more than
11459  *                      one vm memory object. Protections and
11460  *                      inheritance attributes remain the same
11461  *                      as in the original task and are out parameters.
11462  *                      Source and Target task can be identical
11463  *                      Other attributes are identical as for vm_map()
11464  */
11465 kern_return_t
11466 vm_map_remap(
11467         vm_map_t                target_map,
11468         vm_map_address_t        *address,
11469         vm_map_size_t           size,
11470         vm_map_offset_t         mask,
11471         int                     flags,
11472         vm_map_t                src_map,
11473         vm_map_offset_t         memory_address,
11474         boolean_t               copy,
11475         vm_prot_t               *cur_protection,
11476         vm_prot_t               *max_protection,
11477         vm_inherit_t            inheritance)
11478 {
11479         kern_return_t           result;
11480         vm_map_entry_t          entry;
11481         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11482         vm_map_entry_t          new_entry;
11483         struct vm_map_header    map_header;
11484
11485         if (target_map == VM_MAP_NULL)
11486                 return KERN_INVALID_ARGUMENT;
11487
11488         switch (inheritance) {
11489         case VM_INHERIT_NONE:
11490         case VM_INHERIT_COPY:
11491         case VM_INHERIT_SHARE:
11492                 if (size != 0 && src_map != VM_MAP_NULL)
11493                         break;
11494                 /*FALL THRU*/
11495         default:
11496                 return KERN_INVALID_ARGUMENT;
11497         }
11498
11499         size = vm_map_round_page(size);
11500
11501         result = vm_map_remap_extract(src_map, memory_address,
11502                                       size, copy, &map_header,
11503                                       cur_protection,
11504                                       max_protection,
11505                                       inheritance,
11506                                       target_map->hdr.
11507                                       entries_pageable);
11508
11509         if (result != KERN_SUCCESS) {
11510                 return result;
11511         }
11512
11513         /*
11514          * Allocate/check a range of free virtual address
11515          * space for the target
11516          */
11517         *address = vm_map_trunc_page(*address);
11518         vm_map_lock(target_map);
11519         result = vm_map_remap_range_allocate(target_map, address, size,
11520                                              mask, flags, &insp_entry);
11521
11522         for (entry = map_header.links.next;
11523              entry != (struct vm_map_entry *)&map_header.links;
11524              entry = new_entry) {
11525                 new_entry = entry->vme_next;
11526                 _vm_map_store_entry_unlink(&map_header, entry);
11527                 if (result == KERN_SUCCESS) {
11528                         entry->vme_start += *address;
11529                         entry->vme_end += *address;
11530                         vm_map_store_entry_link(target_map, insp_entry, entry);
11531                         insp_entry = entry;
11532                 } else {
11533                         if (!entry->is_sub_map) {
11534                                 vm_object_deallocate(entry->object.vm_object);
11535                         } else {
11536                                 vm_map_deallocate(entry->object.sub_map);
11537                         }
11538                         _vm_map_entry_dispose(&map_header, entry);
11539                 }
11540         }
11541
11542         if( target_map->disable_vmentry_reuse == TRUE) {
11543                 if( target_map->highest_entry_end < insp_entry->vme_end ){
11544                         target_map->highest_entry_end = insp_entry->vme_end;
11545                 }
11546         }
11547
11548         if (result == KERN_SUCCESS) {
11549                 target_map->size += size;
11550                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11551         }
11552         vm_map_unlock(target_map);
11553
11554         if (result == KERN_SUCCESS && target_map->wiring_required)
11555                 result = vm_map_wire(target_map, *address,
11556                                      *address + size, *cur_protection, TRUE);
11557         return result;
11558 }
11559
11560 /*
11561  *      Routine:        vm_map_remap_range_allocate
11562  *
11563  *      Description:
11564  *              Allocate a range in the specified virtual address map.
11565  *              returns the address and the map entry just before the allocated
11566  *              range
11567  *
11568  *      Map must be locked.
11569  */
11570
11571 static kern_return_t
11572 vm_map_remap_range_allocate(
11573         vm_map_t                map,
11574         vm_map_address_t        *address,       /* IN/OUT */
11575         vm_map_size_t           size,
11576         vm_map_offset_t         mask,
11577         int                     flags,
11578         vm_map_entry_t          *map_entry)     /* OUT */
11579 {
11580         vm_map_entry_t  entry;
11581         vm_map_offset_t start;
11582         vm_map_offset_t end;
11583         kern_return_t   kr;
11584
11585 StartAgain: ;
11586
11587         start = *address;
11588
11589         if (flags & VM_FLAGS_ANYWHERE)
11590         {
11591                 /*
11592                  *      Calculate the first possible address.
11593                  */
11594
11595                 if (start < map->min_offset)
11596                         start = map->min_offset;
11597                 if (start > map->max_offset)
11598                         return(KERN_NO_SPACE);
11599
11600                 /*
11601                  *      Look for the first possible address;
11602                  *      if there's already something at this
11603                  *      address, we have to start after it.
11604                  */
11605
11606                 if( map->disable_vmentry_reuse == TRUE) {
11607                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
11608                 } else {
11609                         assert(first_free_is_valid(map));
11610                         if (start == map->min_offset) {
11611                                 if ((entry = map->first_free) != vm_map_to_entry(map))
11612                                         start = entry->vme_end;
11613                         } else {
11614                                 vm_map_entry_t  tmp_entry;
11615                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
11616                                         start = tmp_entry->vme_end;
11617                                 entry = tmp_entry;
11618                         }
11619                 }
11620
11621                 /*
11622                  *      In any case, the "entry" always precedes
11623                  *      the proposed new region throughout the
11624                  *      loop:
11625                  */
11626
11627                 while (TRUE) {
11628                         register vm_map_entry_t next;
11629
11630                         /*
11631                          *      Find the end of the proposed new region.
11632                          *      Be sure we didn't go beyond the end, or
11633                          *      wrap around the address.
11634                          */
11635
11636                         end = ((start + mask) & ~mask);
11637                         if (end < start)
11638                                 return(KERN_NO_SPACE);
11639                         start = end;
11640                         end += size;
11641
11642                         if ((end > map->max_offset) || (end < start)) {
11643                                 if (map->wait_for_space) {
11644                                         if (size <= (map->max_offset -
11645                                                      map->min_offset)) {
11646                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11647                                                 vm_map_unlock(map);
11648                                                 thread_block(THREAD_CONTINUE_NULL);
11649                                                 vm_map_lock(map);
11650                                                 goto StartAgain;
11651                                         }
11652                                 }
11653
11654                                 return(KERN_NO_SPACE);
11655                         }
11656
11657                         /*
11658                          *      If there are no more entries, we must win.
11659                          */
11660
11661                         next = entry->vme_next;
11662                         if (next == vm_map_to_entry(map))
11663                                 break;
11664
11665                         /*
11666                          *      If there is another entry, it must be
11667                          *      after the end of the potential new region.
11668                          */
11669
11670                         if (next->vme_start >= end)
11671                                 break;
11672
11673                         /*
11674                          *      Didn't fit -- move to the next entry.
11675                          */
11676
11677                         entry = next;
11678                         start = entry->vme_end;
11679                 }
11680                 *address = start;
11681         } else {
11682                 vm_map_entry_t          temp_entry;
11683
11684                 /*
11685                  *      Verify that:
11686                  *              the address doesn't itself violate
11687                  *              the mask requirement.
11688                  */
11689
11690                 if ((start & mask) != 0)
11691                         return(KERN_NO_SPACE);
11692
11693
11694                 /*
11695                  *      ...     the address is within bounds
11696                  */
11697
11698                 end = start + size;
11699
11700                 if ((start < map->min_offset) ||
11701                     (end > map->max_offset) ||
11702                     (start >= end)) {
11703                         return(KERN_INVALID_ADDRESS);
11704                 }
11705
11706                 /*
11707                  * If we're asked to overwrite whatever was mapped in that
11708                  * range, first deallocate that range.
11709                  */
11710                 if (flags & VM_FLAGS_OVERWRITE) {
11711                         vm_map_t zap_map;
11712
11713                         /*
11714                          * We use a "zap_map" to avoid having to unlock
11715                          * the "map" in vm_map_delete(), which would compromise
11716                          * the atomicity of the "deallocate" and then "remap"
11717                          * combination.
11718                          */
11719                         zap_map = vm_map_create(PMAP_NULL,
11720                                                 start,
11721                                                 end - start,
11722                                                 map->hdr.entries_pageable);
11723                         if (zap_map == VM_MAP_NULL) {
11724                                 return KERN_RESOURCE_SHORTAGE;
11725                         }
11726
11727                         kr = vm_map_delete(map, start, end,
11728                                            VM_MAP_REMOVE_SAVE_ENTRIES,
11729                                            zap_map);
11730                         if (kr == KERN_SUCCESS) {
11731                                 vm_map_destroy(zap_map,
11732                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11733                                 zap_map = VM_MAP_NULL;
11734                         }
11735                 }
11736
11737                 /*
11738                  *      ...     the starting address isn't allocated
11739                  */
11740
11741                 if (vm_map_lookup_entry(map, start, &temp_entry))
11742                         return(KERN_NO_SPACE);
11743
11744                 entry = temp_entry;
11745
11746                 /*
11747                  *      ...     the next region doesn't overlap the
11748                  *              end point.
11749                  */
11750
11751                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11752                     (entry->vme_next->vme_start < end))
11753                         return(KERN_NO_SPACE);
11754         }
11755         *map_entry = entry;
11756         return(KERN_SUCCESS);
11757 }
11758
11759 /*
11760  *      vm_map_switch:
11761  *
11762  *      Set the address map for the current thread to the specified map
11763  */
11764
11765 vm_map_t
11766 vm_map_switch(
11767         vm_map_t        map)
11768 {
11769         int             mycpu;
11770         thread_t        thread = current_thread();
11771         vm_map_t        oldmap = thread->map;
11772
11773         mp_disable_preemption();
11774         mycpu = cpu_number();
11775
11776         /*
11777          *      Deactivate the current map and activate the requested map
11778          */
11779         PMAP_SWITCH_USER(thread, map, mycpu);
11780
11781         mp_enable_preemption();
11782         return(oldmap);
11783 }
11784
11785
11786 /*
11787  *      Routine:        vm_map_write_user
11788  *
11789  *      Description:
11790  *              Copy out data from a kernel space into space in the
11791  *              destination map. The space must already exist in the
11792  *              destination map.
11793  *              NOTE:  This routine should only be called by threads
11794  *              which can block on a page fault. i.e. kernel mode user
11795  *              threads.
11796  *
11797  */
11798 kern_return_t
11799 vm_map_write_user(
11800         vm_map_t                map,
11801         void                    *src_p,
11802         vm_map_address_t        dst_addr,
11803         vm_size_t               size)
11804 {
11805         kern_return_t   kr = KERN_SUCCESS;
11806
11807         if(current_map() == map) {
11808                 if (copyout(src_p, dst_addr, size)) {
11809                         kr = KERN_INVALID_ADDRESS;
11810                 }
11811         } else {
11812                 vm_map_t        oldmap;
11813
11814                 /* take on the identity of the target map while doing */
11815                 /* the transfer */
11816
11817                 vm_map_reference(map);
11818                 oldmap = vm_map_switch(map);
11819                 if (copyout(src_p, dst_addr, size)) {
11820                         kr = KERN_INVALID_ADDRESS;
11821                 }
11822                 vm_map_switch(oldmap);
11823                 vm_map_deallocate(map);
11824         }
11825         return kr;
11826 }
11827
11828 /*
11829  *      Routine:        vm_map_read_user
11830  *
11831  *      Description:
11832  *              Copy in data from a user space source map into the
11833  *              kernel map. The space must already exist in the
11834  *              kernel map.
11835  *              NOTE:  This routine should only be called by threads
11836  *              which can block on a page fault. i.e. kernel mode user
11837  *              threads.
11838  *
11839  */
11840 kern_return_t
11841 vm_map_read_user(
11842         vm_map_t                map,
11843         vm_map_address_t        src_addr,
11844         void                    *dst_p,
11845         vm_size_t               size)
11846 {
11847         kern_return_t   kr = KERN_SUCCESS;
11848
11849         if(current_map() == map) {
11850                 if (copyin(src_addr, dst_p, size)) {
11851                         kr = KERN_INVALID_ADDRESS;
11852                 }
11853         } else {
11854                 vm_map_t        oldmap;
11855
11856                 /* take on the identity of the target map while doing */
11857                 /* the transfer */
11858
11859                 vm_map_reference(map);
11860                 oldmap = vm_map_switch(map);
11861                 if (copyin(src_addr, dst_p, size)) {
11862                         kr = KERN_INVALID_ADDRESS;
11863                 }
11864                 vm_map_switch(oldmap);
11865                 vm_map_deallocate(map);
11866         }
11867         return kr;
11868 }
11869
11870
11871 /*
11872  *      vm_map_check_protection:
11873  *
11874  *      Assert that the target map allows the specified
11875  *      privilege on the entire address region given.
11876  *      The entire region must be allocated.
11877  */
11878 boolean_t
11879 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11880                         vm_map_offset_t end, vm_prot_t protection)
11881 {
11882         vm_map_entry_t entry;
11883         vm_map_entry_t tmp_entry;
11884
11885         vm_map_lock(map);
11886
11887         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11888         {
11889                 vm_map_unlock(map);
11890                 return (FALSE);
11891         }
11892
11893         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11894                 vm_map_unlock(map);
11895                 return(FALSE);
11896         }
11897
11898         entry = tmp_entry;
11899
11900         while (start < end) {
11901                 if (entry == vm_map_to_entry(map)) {
11902                         vm_map_unlock(map);
11903                         return(FALSE);
11904                 }
11905
11906                 /*
11907                  *      No holes allowed!
11908                  */
11909
11910                 if (start < entry->vme_start) {
11911                         vm_map_unlock(map);
11912                         return(FALSE);
11913                 }
11914
11915                 /*
11916                  * Check protection associated with entry.
11917                  */
11918
11919                 if ((entry->protection & protection) != protection) {
11920                         vm_map_unlock(map);
11921                         return(FALSE);
11922                 }
11923
11924                 /* go to next entry */
11925
11926                 start = entry->vme_end;
11927                 entry = entry->vme_next;
11928         }
11929         vm_map_unlock(map);
11930         return(TRUE);
11931 }
11932
11933 kern_return_t
11934 vm_map_purgable_control(
11935         vm_map_t                map,
11936         vm_map_offset_t         address,
11937         vm_purgable_t           control,
11938         int                     *state)
11939 {
11940         vm_map_entry_t          entry;
11941         vm_object_t             object;
11942         kern_return_t           kr;
11943
11944         /*
11945          * Vet all the input parameters and current type and state of the
11946          * underlaying object.  Return with an error if anything is amiss.
11947          */
11948         if (map == VM_MAP_NULL)
11949                 return(KERN_INVALID_ARGUMENT);
11950
11951         if (control != VM_PURGABLE_SET_STATE &&
11952             control != VM_PURGABLE_GET_STATE &&
11953             control != VM_PURGABLE_PURGE_ALL)
11954                 return(KERN_INVALID_ARGUMENT);
11955
11956         if (control == VM_PURGABLE_PURGE_ALL) {
11957                 vm_purgeable_object_purge_all();
11958                 return KERN_SUCCESS;
11959         }
11960
11961         if (control == VM_PURGABLE_SET_STATE &&
11962             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11963              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11964                 return(KERN_INVALID_ARGUMENT);
11965
11966         vm_map_lock_read(map);
11967
11968         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11969
11970                 /*
11971                  * Must pass a valid non-submap address.
11972                  */
11973                 vm_map_unlock_read(map);
11974                 return(KERN_INVALID_ADDRESS);
11975         }
11976
11977         if ((entry->protection & VM_PROT_WRITE) == 0) {
11978                 /*
11979                  * Can't apply purgable controls to something you can't write.
11980                  */
11981                 vm_map_unlock_read(map);
11982                 return(KERN_PROTECTION_FAILURE);
11983         }
11984
11985         object = entry->object.vm_object;
11986         if (object == VM_OBJECT_NULL) {
11987                 /*
11988                  * Object must already be present or it can't be purgable.
11989                  */
11990                 vm_map_unlock_read(map);
11991                 return KERN_INVALID_ARGUMENT;
11992         }
11993
11994         vm_object_lock(object);
11995
11996         if (entry->offset != 0 ||
11997             entry->vme_end - entry->vme_start != object->vo_size) {
11998                 /*
11999                  * Can only apply purgable controls to the whole (existing)
12000                  * object at once.
12001                  */
12002                 vm_map_unlock_read(map);
12003                 vm_object_unlock(object);
12004                 return KERN_INVALID_ARGUMENT;
12005         }
12006
12007         vm_map_unlock_read(map);
12008
12009         kr = vm_object_purgable_control(object, control, state);
12010
12011         vm_object_unlock(object);
12012
12013         return kr;
12014 }
12015
12016 kern_return_t
12017 vm_map_page_query_internal(
12018         vm_map_t        target_map,
12019         vm_map_offset_t offset,
12020         int             *disposition,
12021         int             *ref_count)
12022 {
12023         kern_return_t                   kr;
12024         vm_page_info_basic_data_t       info;
12025         mach_msg_type_number_t          count;
12026
12027         count = VM_PAGE_INFO_BASIC_COUNT;
12028         kr = vm_map_page_info(target_map,
12029                               offset,
12030                               VM_PAGE_INFO_BASIC,
12031                               (vm_page_info_t) &info,
12032                               &count);
12033         if (kr == KERN_SUCCESS) {
12034                 *disposition = info.disposition;
12035                 *ref_count = info.ref_count;
12036         } else {
12037                 *disposition = 0;
12038                 *ref_count = 0;
12039         }
12040
12041         return kr;
12042 }
12043
12044 kern_return_t
12045 vm_map_page_info(
12046         vm_map_t                map,
12047         vm_map_offset_t         offset,
12048         vm_page_info_flavor_t   flavor,
12049         vm_page_info_t          info,
12050         mach_msg_type_number_t  *count)
12051 {
12052         vm_map_entry_t          map_entry;
12053         vm_object_t             object;
12054         vm_page_t               m;
12055         kern_return_t           kr;
12056         kern_return_t           retval = KERN_SUCCESS;
12057         boolean_t               top_object;
12058         int                     disposition;
12059         int                     ref_count;
12060         vm_object_id_t          object_id;
12061         vm_page_info_basic_t    basic_info;
12062         int                     depth;
12063         vm_map_offset_t         offset_in_page;
12064
12065         switch (flavor) {
12066         case VM_PAGE_INFO_BASIC:
12067                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12068                         /*
12069                          * The "vm_page_info_basic_data" structure was not
12070                          * properly padded, so allow the size to be off by
12071                          * one to maintain backwards binary compatibility...
12072                          */
12073                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12074                                 return KERN_INVALID_ARGUMENT;
12075                 }
12076                 break;
12077         default:
12078                 return KERN_INVALID_ARGUMENT;
12079         }
12080
12081         disposition = 0;
12082         ref_count = 0;
12083         object_id = 0;
12084         top_object = TRUE;
12085         depth = 0;
12086
12087         retval = KERN_SUCCESS;
12088         offset_in_page = offset & PAGE_MASK;
12089         offset = vm_map_trunc_page(offset);
12090
12091         vm_map_lock_read(map);
12092
12093         /*
12094          * First, find the map entry covering "offset", going down
12095          * submaps if necessary.
12096          */
12097         for (;;) {
12098                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12099                         vm_map_unlock_read(map);
12100                         return KERN_INVALID_ADDRESS;
12101                 }
12102                 /* compute offset from this map entry's start */
12103                 offset -= map_entry->vme_start;
12104                 /* compute offset into this map entry's object (or submap) */
12105                 offset += map_entry->offset;
12106
12107                 if (map_entry->is_sub_map) {
12108                         vm_map_t sub_map;
12109
12110                         sub_map = map_entry->object.sub_map;
12111                         vm_map_lock_read(sub_map);
12112                         vm_map_unlock_read(map);
12113
12114                         map = sub_map;
12115
12116                         ref_count = MAX(ref_count, map->ref_count);
12117                         continue;
12118                 }
12119                 break;
12120         }
12121
12122         object = map_entry->object.vm_object;
12123         if (object == VM_OBJECT_NULL) {
12124                 /* no object -> no page */
12125                 vm_map_unlock_read(map);
12126                 goto done;
12127         }
12128
12129         vm_object_lock(object);
12130         vm_map_unlock_read(map);
12131
12132         /*
12133          * Go down the VM object shadow chain until we find the page
12134          * we're looking for.
12135          */
12136         for (;;) {
12137                 ref_count = MAX(ref_count, object->ref_count);
12138
12139                 m = vm_page_lookup(object, offset);
12140
12141                 if (m != VM_PAGE_NULL) {
12142                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12143                         break;
12144                 } else {
12145 #if MACH_PAGEMAP
12146                         if (object->existence_map) {
12147                                 if (vm_external_state_get(object->existence_map,
12148                                                           offset) ==
12149                                     VM_EXTERNAL_STATE_EXISTS) {
12150                                         /*
12151                                          * this page has been paged out
12152                                          */
12153                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12154                                         break;
12155                                 }
12156                         } else
12157 #endif
12158                         {
12159                                 if (object->internal &&
12160                                     object->alive &&
12161                                     !object->terminating &&
12162                                     object->pager_ready) {
12163
12164                                         memory_object_t pager;
12165
12166                                         vm_object_paging_begin(object);
12167                                         pager = object->pager;
12168                                         vm_object_unlock(object);
12169
12170                                         /*
12171                                          * Ask the default pager if
12172                                          * it has this page.
12173                                          */
12174                                         kr = memory_object_data_request(
12175                                                 pager,
12176                                                 offset + object->paging_offset,
12177                                                 0, /* just poke the pager */
12178                                                 VM_PROT_READ,
12179                                                 NULL);
12180
12181                                         vm_object_lock(object);
12182                                         vm_object_paging_end(object);
12183
12184                                         if (kr == KERN_SUCCESS) {
12185                                                 /* the default pager has it */
12186                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12187                                                 break;
12188                                         }
12189                                 }
12190                         }
12191
12192                         if (object->shadow != VM_OBJECT_NULL) {
12193                                 vm_object_t shadow;
12194
12195                                 offset += object->vo_shadow_offset;
12196                                 shadow = object->shadow;
12197
12198                                 vm_object_lock(shadow);
12199                                 vm_object_unlock(object);
12200
12201                                 object = shadow;
12202                                 top_object = FALSE;
12203                                 depth++;
12204                         } else {
12205 //                              if (!object->internal)
12206 //                                      break;
12207 //                              retval = KERN_FAILURE;
12208 //                              goto done_with_object;
12209                                 break;
12210                         }
12211                 }
12212         }
12213         /* The ref_count is not strictly accurate, it measures the number   */
12214         /* of entities holding a ref on the object, they may not be mapping */
12215         /* the object or may not be mapping the section holding the         */
12216         /* target page but its still a ball park number and though an over- */
12217         /* count, it picks up the copy-on-write cases                       */
12218
12219         /* We could also get a picture of page sharing from pmap_attributes */
12220         /* but this would under count as only faulted-in mappings would     */
12221         /* show up.                                                         */
12222
12223         if (top_object == TRUE && object->shadow)
12224                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12225
12226         if (! object->internal)
12227                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12228
12229         if (m == VM_PAGE_NULL)
12230                 goto done_with_object;
12231
12232         if (m->fictitious) {
12233                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12234                 goto done_with_object;
12235         }
12236         if (m->dirty || pmap_is_modified(m->phys_page))
12237                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12238
12239         if (m->reference || pmap_is_referenced(m->phys_page))
12240                 disposition |= VM_PAGE_QUERY_PAGE_REF;
12241
12242         if (m->speculative)
12243                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12244
12245         if (m->cs_validated)
12246                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12247         if (m->cs_tainted)
12248                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12249
12250 done_with_object:
12251         vm_object_unlock(object);
12252 done:
12253
12254         switch (flavor) {
12255         case VM_PAGE_INFO_BASIC:
12256                 basic_info = (vm_page_info_basic_t) info;
12257                 basic_info->disposition = disposition;
12258                 basic_info->ref_count = ref_count;
12259                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12260                 basic_info->offset =
12261                         (memory_object_offset_t) offset + offset_in_page;
12262                 basic_info->depth = depth;
12263                 break;
12264         }
12265
12266         return retval;
12267 }
12268
12269 /*
12270  *      vm_map_msync
12271  *
12272  *      Synchronises the memory range specified with its backing store
12273  *      image by either flushing or cleaning the contents to the appropriate
12274  *      memory manager engaging in a memory object synchronize dialog with
12275  *      the manager.  The client doesn't return until the manager issues
12276  *      m_o_s_completed message.  MIG Magically converts user task parameter
12277  *      to the task's address map.
12278  *
12279  *      interpretation of sync_flags
12280  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
12281  *                                pages to manager.
12282  *
12283  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12284  *                              - discard pages, write dirty or precious
12285  *                                pages back to memory manager.
12286  *
12287  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12288  *                              - write dirty or precious pages back to
12289  *                                the memory manager.
12290  *
12291  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
12292  *                                is a hole in the region, and we would
12293  *                                have returned KERN_SUCCESS, return
12294  *                                KERN_INVALID_ADDRESS instead.
12295  *
12296  *      NOTE
12297  *      The memory object attributes have not yet been implemented, this
12298  *      function will have to deal with the invalidate attribute
12299  *
12300  *      RETURNS
12301  *      KERN_INVALID_TASK               Bad task parameter
12302  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
12303  *      KERN_SUCCESS                    The usual.
12304  *      KERN_INVALID_ADDRESS            There was a hole in the region.
12305  */
12306
12307 kern_return_t
12308 vm_map_msync(
12309         vm_map_t                map,
12310         vm_map_address_t        address,
12311         vm_map_size_t           size,
12312         vm_sync_t               sync_flags)
12313 {
12314         msync_req_t             msr;
12315         msync_req_t             new_msr;
12316         queue_chain_t           req_q;  /* queue of requests for this msync */
12317         vm_map_entry_t          entry;
12318         vm_map_size_t           amount_left;
12319         vm_object_offset_t      offset;
12320         boolean_t               do_sync_req;
12321         boolean_t               had_hole = FALSE;
12322         memory_object_t         pager;
12323
12324         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12325             (sync_flags & VM_SYNC_SYNCHRONOUS))
12326                 return(KERN_INVALID_ARGUMENT);
12327
12328         /*
12329          * align address and size on page boundaries
12330          */
12331         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12332         address = vm_map_trunc_page(address);
12333
12334         if (map == VM_MAP_NULL)
12335                 return(KERN_INVALID_TASK);
12336
12337         if (size == 0)
12338                 return(KERN_SUCCESS);
12339
12340         queue_init(&req_q);
12341         amount_left = size;
12342
12343         while (amount_left > 0) {
12344                 vm_object_size_t        flush_size;
12345                 vm_object_t             object;
12346
12347                 vm_map_lock(map);
12348                 if (!vm_map_lookup_entry(map,
12349                                          vm_map_trunc_page(address), &entry)) {
12350
12351                         vm_map_size_t   skip;
12352
12353                         /*
12354                          * hole in the address map.
12355                          */
12356                         had_hole = TRUE;
12357
12358                         /*
12359                          * Check for empty map.
12360                          */
12361                         if (entry == vm_map_to_entry(map) &&
12362                             entry->vme_next == entry) {
12363                                 vm_map_unlock(map);
12364                                 break;
12365                         }
12366                         /*
12367                          * Check that we don't wrap and that
12368                          * we have at least one real map entry.
12369                          */
12370                         if ((map->hdr.nentries == 0) ||
12371                             (entry->vme_next->vme_start < address)) {
12372                                 vm_map_unlock(map);
12373                                 break;
12374                         }
12375                         /*
12376                          * Move up to the next entry if needed
12377                          */
12378                         skip = (entry->vme_next->vme_start - address);
12379                         if (skip >= amount_left)
12380                                 amount_left = 0;
12381                         else
12382                                 amount_left -= skip;
12383                         address = entry->vme_next->vme_start;
12384                         vm_map_unlock(map);
12385                         continue;
12386                 }
12387
12388                 offset = address - entry->vme_start;
12389
12390                 /*
12391                  * do we have more to flush than is contained in this
12392                  * entry ?
12393                  */
12394                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12395                         flush_size = entry->vme_end -
12396                                 (entry->vme_start + offset);
12397                 } else {
12398                         flush_size = amount_left;
12399                 }
12400                 amount_left -= flush_size;
12401                 address += flush_size;
12402
12403                 if (entry->is_sub_map == TRUE) {
12404                         vm_map_t        local_map;
12405                         vm_map_offset_t local_offset;
12406
12407                         local_map = entry->object.sub_map;
12408                         local_offset = entry->offset;
12409                         vm_map_unlock(map);
12410                         if (vm_map_msync(
12411                                     local_map,
12412                                     local_offset,
12413                                     flush_size,
12414                                     sync_flags) == KERN_INVALID_ADDRESS) {
12415                                 had_hole = TRUE;
12416                         }
12417                         continue;
12418                 }
12419                 object = entry->object.vm_object;
12420
12421                 /*
12422                  * We can't sync this object if the object has not been
12423                  * created yet
12424                  */
12425                 if (object == VM_OBJECT_NULL) {
12426                         vm_map_unlock(map);
12427                         continue;
12428                 }
12429                 offset += entry->offset;
12430
12431                 vm_object_lock(object);
12432
12433                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12434                         int kill_pages = 0;
12435                         boolean_t reusable_pages = FALSE;
12436
12437                         if (sync_flags & VM_SYNC_KILLPAGES) {
12438                                 if (object->ref_count == 1 && !object->shadow)
12439                                         kill_pages = 1;
12440                                 else
12441                                         kill_pages = -1;
12442                         }
12443                         if (kill_pages != -1)
12444                                 vm_object_deactivate_pages(object, offset,
12445                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12446                         vm_object_unlock(object);
12447                         vm_map_unlock(map);
12448                         continue;
12449                 }
12450                 /*
12451                  * We can't sync this object if there isn't a pager.
12452                  * Don't bother to sync internal objects, since there can't
12453                  * be any "permanent" storage for these objects anyway.
12454                  */
12455                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12456                     (object->internal) || (object->private)) {
12457                         vm_object_unlock(object);
12458                         vm_map_unlock(map);
12459                         continue;
12460                 }
12461                 /*
12462                  * keep reference on the object until syncing is done
12463                  */
12464                 vm_object_reference_locked(object);
12465                 vm_object_unlock(object);
12466
12467                 vm_map_unlock(map);
12468
12469                 do_sync_req = vm_object_sync(object,
12470                                              offset,
12471                                              flush_size,
12472                                              sync_flags & VM_SYNC_INVALIDATE,
12473                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12474                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12475                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12476                 /*
12477                  * only send a m_o_s if we returned pages or if the entry
12478                  * is writable (ie dirty pages may have already been sent back)
12479                  */
12480                 if (!do_sync_req) {
12481                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12482                                 /*
12483                                  * clear out the clustering and read-ahead hints
12484                                  */
12485                                 vm_object_lock(object);
12486
12487                                 object->pages_created = 0;
12488                                 object->pages_used = 0;
12489                                 object->sequential = 0;
12490                                 object->last_alloc = 0;
12491
12492                                 vm_object_unlock(object);
12493                         }
12494                         vm_object_deallocate(object);
12495                         continue;
12496                 }
12497                 msync_req_alloc(new_msr);
12498
12499                 vm_object_lock(object);
12500                 offset += object->paging_offset;
12501
12502                 new_msr->offset = offset;
12503                 new_msr->length = flush_size;
12504                 new_msr->object = object;
12505                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12506         re_iterate:
12507
12508                 /*
12509                  * We can't sync this object if there isn't a pager.  The
12510                  * pager can disappear anytime we're not holding the object
12511                  * lock.  So this has to be checked anytime we goto re_iterate.
12512                  */
12513
12514                 pager = object->pager;
12515
12516                 if (pager == MEMORY_OBJECT_NULL) {
12517                         vm_object_unlock(object);
12518                         vm_object_deallocate(object);
12519                         continue;
12520                 }
12521
12522                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12523                         /*
12524                          * need to check for overlapping entry, if found, wait
12525                          * on overlapping msr to be done, then reiterate
12526                          */
12527                         msr_lock(msr);
12528                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12529                             ((offset >= msr->offset &&
12530                               offset < (msr->offset + msr->length)) ||
12531                              (msr->offset >= offset &&
12532                               msr->offset < (offset + flush_size))))
12533                         {
12534                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12535                                 msr_unlock(msr);
12536                                 vm_object_unlock(object);
12537                                 thread_block(THREAD_CONTINUE_NULL);
12538                                 vm_object_lock(object);
12539                                 goto re_iterate;
12540                         }
12541                         msr_unlock(msr);
12542                 }/* queue_iterate */
12543
12544                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12545
12546                 vm_object_paging_begin(object);
12547                 vm_object_unlock(object);
12548
12549                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12550
12551                 (void) memory_object_synchronize(
12552                         pager,
12553                         offset,
12554                         flush_size,
12555                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12556
12557                 vm_object_lock(object);
12558                 vm_object_paging_end(object);
12559                 vm_object_unlock(object);
12560         }/* while */
12561
12562         /*
12563          * wait for memory_object_sychronize_completed messages from pager(s)
12564          */
12565
12566         while (!queue_empty(&req_q)) {
12567                 msr = (msync_req_t)queue_first(&req_q);
12568                 msr_lock(msr);
12569                 while(msr->flag != VM_MSYNC_DONE) {
12570                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12571                         msr_unlock(msr);
12572                         thread_block(THREAD_CONTINUE_NULL);
12573                         msr_lock(msr);
12574                 }/* while */
12575                 queue_remove(&req_q, msr, msync_req_t, req_q);
12576                 msr_unlock(msr);
12577                 vm_object_deallocate(msr->object);
12578                 msync_req_free(msr);
12579         }/* queue_iterate */
12580
12581         /* for proper msync() behaviour */
12582         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12583                 return(KERN_INVALID_ADDRESS);
12584
12585         return(KERN_SUCCESS);
12586 }/* vm_msync */
12587
12588 /*
12589  *      Routine:        convert_port_entry_to_map
12590  *      Purpose:
12591  *              Convert from a port specifying an entry or a task
12592  *              to a map. Doesn't consume the port ref; produces a map ref,
12593  *              which may be null.  Unlike convert_port_to_map, the
12594  *              port may be task or a named entry backed.
12595  *      Conditions:
12596  *              Nothing locked.
12597  */
12598
12599
12600 vm_map_t
12601 convert_port_entry_to_map(
12602         ipc_port_t      port)
12603 {
12604         vm_map_t map;
12605         vm_named_entry_t        named_entry;
12606         uint32_t        try_failed_count = 0;
12607
12608         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12609                 while(TRUE) {
12610                         ip_lock(port);
12611                         if(ip_active(port) && (ip_kotype(port)
12612                                                == IKOT_NAMED_ENTRY)) {
12613                                 named_entry =
12614                                         (vm_named_entry_t)port->ip_kobject;
12615                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12616                                         ip_unlock(port);
12617
12618                                         try_failed_count++;
12619                                         mutex_pause(try_failed_count);
12620                                         continue;
12621                                 }
12622                                 named_entry->ref_count++;
12623                                 lck_mtx_unlock(&(named_entry)->Lock);
12624                                 ip_unlock(port);
12625                                 if ((named_entry->is_sub_map) &&
12626                                     (named_entry->protection
12627                                      & VM_PROT_WRITE)) {
12628                                         map = named_entry->backing.map;
12629                                 } else {
12630                                         mach_destroy_memory_entry(port);
12631                                         return VM_MAP_NULL;
12632                                 }
12633                                 vm_map_reference_swap(map);
12634                                 mach_destroy_memory_entry(port);
12635                                 break;
12636                         }
12637                         else
12638                                 return VM_MAP_NULL;
12639                 }
12640         }
12641         else
12642                 map = convert_port_to_map(port);
12643
12644         return map;
12645 }
12646
12647 /*
12648  *      Routine:        convert_port_entry_to_object
12649  *      Purpose:
12650  *              Convert from a port specifying a named entry to an
12651  *              object. Doesn't consume the port ref; produces a map ref,
12652  *              which may be null.
12653  *      Conditions:
12654  *              Nothing locked.
12655  */
12656
12657
12658 vm_object_t
12659 convert_port_entry_to_object(
12660         ipc_port_t      port)
12661 {
12662         vm_object_t object;
12663         vm_named_entry_t        named_entry;
12664         uint32_t        try_failed_count = 0;
12665
12666         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12667                 while(TRUE) {
12668                         ip_lock(port);
12669                         if(ip_active(port) && (ip_kotype(port)
12670                                                == IKOT_NAMED_ENTRY)) {
12671                                 named_entry =
12672                                         (vm_named_entry_t)port->ip_kobject;
12673                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12674                                         ip_unlock(port);
12675
12676                                         try_failed_count++;
12677                                         mutex_pause(try_failed_count);
12678                                         continue;
12679                                 }
12680                                 named_entry->ref_count++;
12681                                 lck_mtx_unlock(&(named_entry)->Lock);
12682                                 ip_unlock(port);
12683                                 if ((!named_entry->is_sub_map) &&
12684                                     (!named_entry->is_pager) &&
12685                                     (named_entry->protection
12686                                      & VM_PROT_WRITE)) {
12687                                         object = named_entry->backing.object;
12688                                 } else {
12689                                         mach_destroy_memory_entry(port);
12690                                         return (vm_object_t)NULL;
12691                                 }
12692                                 vm_object_reference(named_entry->backing.object);
12693                                 mach_destroy_memory_entry(port);
12694                                 break;
12695                         }
12696                         else
12697                                 return (vm_object_t)NULL;
12698                 }
12699         } else {
12700                 return (vm_object_t)NULL;
12701         }
12702
12703         return object;
12704 }
12705
12706 /*
12707  * Export routines to other components for the things we access locally through
12708  * macros.
12709  */
12710 #undef current_map
12711 vm_map_t
12712 current_map(void)
12713 {
12714         return (current_map_fast());
12715 }
12716
12717 /*
12718  *      vm_map_reference:
12719  *
12720  *      Most code internal to the osfmk will go through a
12721  *      macro defining this.  This is always here for the
12722  *      use of other kernel components.
12723  */
12724 #undef vm_map_reference
12725 void
12726 vm_map_reference(
12727         register vm_map_t       map)
12728 {
12729         if (map == VM_MAP_NULL)
12730                 return;
12731
12732         lck_mtx_lock(&map->s_lock);
12733 #if     TASK_SWAPPER
12734         assert(map->res_count > 0);
12735         assert(map->ref_count >= map->res_count);
12736         map->res_count++;
12737 #endif
12738         map->ref_count++;
12739         lck_mtx_unlock(&map->s_lock);
12740 }
12741
12742 /*
12743  *      vm_map_deallocate:
12744  *
12745  *      Removes a reference from the specified map,
12746  *      destroying it if no references remain.
12747  *      The map should not be locked.
12748  */
12749 void
12750 vm_map_deallocate(
12751         register vm_map_t       map)
12752 {
12753         unsigned int            ref;
12754
12755         if (map == VM_MAP_NULL)
12756                 return;
12757
12758         lck_mtx_lock(&map->s_lock);
12759         ref = --map->ref_count;
12760         if (ref > 0) {
12761                 vm_map_res_deallocate(map);
12762                 lck_mtx_unlock(&map->s_lock);
12763                 return;
12764         }
12765         assert(map->ref_count == 0);
12766         lck_mtx_unlock(&map->s_lock);
12767
12768 #if     TASK_SWAPPER
12769         /*
12770          * The map residence count isn't decremented here because
12771          * the vm_map_delete below will traverse the entire map,
12772          * deleting entries, and the residence counts on objects
12773          * and sharing maps will go away then.
12774          */
12775 #endif
12776
12777         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12778 }
12779
12780
12781 void
12782 vm_map_disable_NX(vm_map_t map)
12783 {
12784         if (map == NULL)
12785                 return;
12786         if (map->pmap == NULL)
12787                 return;
12788
12789         pmap_disable_NX(map->pmap);
12790 }
12791
12792 void
12793 vm_map_disallow_data_exec(vm_map_t map)
12794 {
12795     if (map == NULL)
12796         return;
12797
12798     map->map_disallow_data_exec = TRUE;
12799 }
12800
12801 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12802  * more descriptive.
12803  */
12804 void
12805 vm_map_set_32bit(vm_map_t map)
12806 {
12807         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12808 }
12809
12810
12811 void
12812 vm_map_set_64bit(vm_map_t map)
12813 {
12814         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12815 }
12816
12817 vm_map_offset_t
12818 vm_compute_max_offset(unsigned is64)
12819 {
12820         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12821 }
12822
12823 boolean_t
12824 vm_map_is_64bit(
12825                 vm_map_t map)
12826 {
12827         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12828 }
12829
12830 boolean_t
12831 vm_map_has_4GB_pagezero(
12832                 vm_map_t map)
12833 {
12834         /*
12835          * XXX FBDP
12836          * We should lock the VM map (for read) here but we can get away
12837          * with it for now because there can't really be any race condition:
12838          * the VM map's min_offset is changed only when the VM map is created
12839          * and when the zero page is established (when the binary gets loaded),
12840          * and this routine gets called only when the task terminates and the
12841          * VM map is being torn down, and when a new map is created via
12842          * load_machfile()/execve().
12843          */
12844         return (map->min_offset >= 0x100000000ULL);
12845 }
12846
12847 void
12848 vm_map_set_4GB_pagezero(vm_map_t map)
12849 {
12850 #if defined(__i386__)
12851         pmap_set_4GB_pagezero(map->pmap);
12852 #else
12853 #pragma unused(map)
12854 #endif
12855
12856 }
12857
12858 void
12859 vm_map_clear_4GB_pagezero(vm_map_t map)
12860 {
12861 #if defined(__i386__)
12862         pmap_clear_4GB_pagezero(map->pmap);
12863 #else
12864 #pragma unused(map)
12865 #endif
12866 }
12867
12868 /*
12869  * Raise a VM map's minimum offset.
12870  * To strictly enforce "page zero" reservation.
12871  */
12872 kern_return_t
12873 vm_map_raise_min_offset(
12874         vm_map_t        map,
12875         vm_map_offset_t new_min_offset)
12876 {
12877         vm_map_entry_t  first_entry;
12878
12879         new_min_offset = vm_map_round_page(new_min_offset);
12880
12881         vm_map_lock(map);
12882
12883         if (new_min_offset < map->min_offset) {
12884                 /*
12885                  * Can't move min_offset backwards, as that would expose
12886                  * a part of the address space that was previously, and for
12887                  * possibly good reasons, inaccessible.
12888                  */
12889                 vm_map_unlock(map);
12890                 return KERN_INVALID_ADDRESS;
12891         }
12892
12893         first_entry = vm_map_first_entry(map);
12894         if (first_entry != vm_map_to_entry(map) &&
12895             first_entry->vme_start < new_min_offset) {
12896                 /*
12897                  * Some memory was already allocated below the new
12898                  * minimun offset.  It's too late to change it now...
12899                  */
12900                 vm_map_unlock(map);
12901                 return KERN_NO_SPACE;
12902         }
12903
12904         map->min_offset = new_min_offset;
12905
12906         vm_map_unlock(map);
12907
12908         return KERN_SUCCESS;
12909 }
12910
12911 /*
12912  * Set the limit on the maximum amount of user wired memory allowed for this map.
12913  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12914  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12915  * don't have to reach over to the BSD data structures.
12916  */
12917
12918 void
12919 vm_map_set_user_wire_limit(vm_map_t     map,
12920                            vm_size_t    limit)
12921 {
12922         map->user_wire_limit = limit;
12923 }
12924
12925
12926 void vm_map_switch_protect(vm_map_t     map,
12927                            boolean_t    val)
12928 {
12929         vm_map_lock(map);
12930         map->switch_protect=val;
12931         vm_map_unlock(map);
12932 }
12933
12934 /* Add (generate) code signature for memory range */
12935 #if CONFIG_DYNAMIC_CODE_SIGNING
12936 kern_return_t vm_map_sign(vm_map_t map,
12937                  vm_map_offset_t start,
12938                  vm_map_offset_t end)
12939 {
12940         vm_map_entry_t entry;
12941         vm_page_t m;
12942         vm_object_t object;
12943
12944         /*
12945          * Vet all the input parameters and current type and state of the
12946          * underlaying object.  Return with an error if anything is amiss.
12947          */
12948         if (map == VM_MAP_NULL)
12949                 return(KERN_INVALID_ARGUMENT);
12950
12951         vm_map_lock_read(map);
12952
12953         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12954                 /*
12955                  * Must pass a valid non-submap address.
12956                  */
12957                 vm_map_unlock_read(map);
12958                 return(KERN_INVALID_ADDRESS);
12959         }
12960
12961         if((entry->vme_start > start) || (entry->vme_end < end)) {
12962                 /*
12963                  * Map entry doesn't cover the requested range. Not handling
12964                  * this situation currently.
12965                  */
12966                 vm_map_unlock_read(map);
12967                 return(KERN_INVALID_ARGUMENT);
12968         }
12969
12970         object = entry->object.vm_object;
12971         if (object == VM_OBJECT_NULL) {
12972                 /*
12973                  * Object must already be present or we can't sign.
12974                  */
12975                 vm_map_unlock_read(map);
12976                 return KERN_INVALID_ARGUMENT;
12977         }
12978
12979         vm_object_lock(object);
12980         vm_map_unlock_read(map);
12981
12982         while(start < end) {
12983                 uint32_t refmod;
12984
12985                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12986                 if (m==VM_PAGE_NULL) {
12987                         /* shoud we try to fault a page here? we can probably
12988                          * demand it exists and is locked for this request */
12989                         vm_object_unlock(object);
12990                         return KERN_FAILURE;
12991                 }
12992                 /* deal with special page status */
12993                 if (m->busy ||
12994                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12995                         vm_object_unlock(object);
12996                         return KERN_FAILURE;
12997                 }
12998
12999                 /* Page is OK... now "validate" it */
13000                 /* This is the place where we'll call out to create a code
13001                  * directory, later */
13002                 m->cs_validated = TRUE;
13003
13004                 /* The page is now "clean" for codesigning purposes. That means
13005                  * we don't consider it as modified (wpmapped) anymore. But
13006                  * we'll disconnect the page so we note any future modification
13007                  * attempts. */
13008                 m->wpmapped = FALSE;
13009                 refmod = pmap_disconnect(m->phys_page);
13010
13011                 /* Pull the dirty status from the pmap, since we cleared the
13012                  * wpmapped bit */
13013                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13014                         m->dirty = TRUE;
13015                 }
13016
13017                 /* On to the next page */
13018                 start += PAGE_SIZE;
13019         }
13020         vm_object_unlock(object);
13021
13022         return KERN_SUCCESS;
13023 }
13024 #endif
13025
13026 #if CONFIG_FREEZE
13027
13028 kern_return_t vm_map_freeze_walk(
13029                 vm_map_t map,
13030                 unsigned int *purgeable_count,
13031                 unsigned int *wired_count,
13032                 unsigned int *clean_count,
13033                 unsigned int *dirty_count,
13034                 boolean_t *has_shared)
13035 {
13036         vm_map_entry_t entry;
13037
13038         vm_map_lock_read(map);
13039
13040         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13041         *has_shared = FALSE;
13042
13043         for (entry = vm_map_first_entry(map);
13044              entry != vm_map_to_entry(map);
13045              entry = entry->vme_next) {
13046                 unsigned int purgeable, clean, dirty, wired;
13047                 boolean_t shared;
13048
13049                 if ((entry->object.vm_object == 0) ||
13050                     (entry->is_sub_map) ||
13051                     (entry->object.vm_object->phys_contiguous)) {
13052                         continue;
13053                 }
13054
13055                 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
13056
13057                 *purgeable_count += purgeable;
13058                 *wired_count += wired;
13059                 *clean_count += clean;
13060                 *dirty_count += dirty;
13061
13062                 if (shared) {
13063                         *has_shared = TRUE;
13064                 }
13065         }
13066
13067         vm_map_unlock_read(map);
13068
13069         return KERN_SUCCESS;
13070 }
13071
13072 kern_return_t vm_map_freeze(
13073                 vm_map_t map,
13074                 unsigned int *purgeable_count,
13075                 unsigned int *wired_count,
13076                 unsigned int *clean_count,
13077                 unsigned int *dirty_count,
13078                 boolean_t *has_shared)
13079 {
13080         vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13081         vm_object_t compact_object = VM_OBJECT_NULL;
13082         vm_object_offset_t offset = 0x0;
13083         kern_return_t kr = KERN_SUCCESS;
13084         void *default_freezer_toc = NULL;
13085         boolean_t cleanup = FALSE;
13086
13087         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13088         *has_shared = FALSE;
13089
13090         /* Create our compact object */
13091         compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13092         if (!compact_object) {
13093                 kr = KERN_FAILURE;
13094                 goto done;
13095         }
13096
13097         default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13098         if (!default_freezer_toc) {
13099                 kr = KERN_FAILURE;
13100                 goto done;
13101         }
13102
13103         /*
13104          * We need the exclusive lock here so that we can
13105          * block any page faults or lookups while we are
13106          * in the middle of freezing this vm map.
13107          */
13108         vm_map_lock(map);
13109
13110         if (map->default_freezer_toc != NULL){
13111                 /*
13112                  * This map has already been frozen.
13113                  */
13114                 cleanup = TRUE;
13115                 kr = KERN_SUCCESS;
13116                 goto done;
13117         }
13118
13119         /* Get a mapping in place for the freezing about to commence */
13120         map->default_freezer_toc = default_freezer_toc;
13121
13122         vm_object_lock(compact_object);
13123
13124         for (entry2 = vm_map_first_entry(map);
13125              entry2 != vm_map_to_entry(map);
13126              entry2 = entry2->vme_next) {
13127
13128                 vm_object_t     src_object = entry2->object.vm_object;
13129
13130                 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13131                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13132                         unsigned int purgeable, clean, dirty, wired;
13133                         boolean_t shared;
13134
13135                         vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13136                                                         src_object, compact_object, &default_freezer_toc, &offset);
13137
13138                         *purgeable_count += purgeable;
13139                         *wired_count += wired;
13140                         *clean_count += clean;
13141                         *dirty_count += dirty;
13142
13143                         if (shared) {
13144                                 *has_shared = TRUE;
13145                         }
13146                 }
13147         }
13148
13149         vm_object_unlock(compact_object);
13150
13151         /* Finally, throw out the pages to swap */
13152         vm_object_pageout(compact_object);
13153
13154 done:
13155         vm_map_unlock(map);
13156
13157         /* Unwind if there was a failure */
13158         if ((cleanup) || (KERN_SUCCESS != kr)) {
13159                 if (default_freezer_toc){
13160                         default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13161                 }
13162                 if (compact_object){
13163                         vm_object_deallocate(compact_object);
13164                 }
13165         }
13166
13167         return kr;
13168 }
13169
13170 __private_extern__ vm_object_t  default_freezer_get_compact_vm_object( void** );
13171
13172 void
13173 vm_map_thaw(
13174         vm_map_t map)
13175 {
13176         void **default_freezer_toc;
13177         vm_object_t compact_object;
13178
13179         vm_map_lock(map);
13180
13181         if (map->default_freezer_toc == NULL){
13182                 /*
13183                  * This map is not in a frozen state.
13184                  */
13185                 goto out;
13186         }
13187
13188         default_freezer_toc = &(map->default_freezer_toc);
13189
13190         compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13191
13192         /* Bring the pages back in */
13193         vm_object_pagein(compact_object);
13194
13195         /* Shift pages back to their original objects */
13196         vm_object_unpack(compact_object, default_freezer_toc);
13197
13198         vm_object_deallocate(compact_object);
13199
13200         map->default_freezer_toc = NULL;
13201
13202 out:
13203         vm_map_unlock(map);
13204 }
13205 #endif