osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <machine/db_machdep.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_shared_region.h>
 107 #include <vm/vm_map_store.h>
 108
 109 /* Internal prototypes
 110  */
 111
 112 static void vm_map_simplify_range(
 113         vm_map_t        map,
 114         vm_map_offset_t start,
 115         vm_map_offset_t end);   /* forward */
 116
 117 static boolean_t        vm_map_range_check(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end,
 121         vm_map_entry_t  *entry);
 122
 123 static vm_map_entry_t   _vm_map_entry_create(
 124         struct vm_map_header    *map_header);
 125
 126 static void             _vm_map_entry_dispose(
 127         struct vm_map_header    *map_header,
 128         vm_map_entry_t          entry);
 129
 130 static void             vm_map_pmap_enter(
 131         vm_map_t                map,
 132         vm_map_offset_t         addr,
 133         vm_map_offset_t         end_addr,
 134         vm_object_t             object,
 135         vm_object_offset_t      offset,
 136         vm_prot_t               protection);
 137
 138 static void             _vm_map_clip_end(
 139         struct vm_map_header    *map_header,
 140         vm_map_entry_t          entry,
 141         vm_map_offset_t         end);
 142
 143 static void             _vm_map_clip_start(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         start);
 147
 148 static void             vm_map_entry_delete(
 149         vm_map_t        map,
 150         vm_map_entry_t  entry);
 151
 152 static kern_return_t    vm_map_delete(
 153         vm_map_t        map,
 154         vm_map_offset_t start,
 155         vm_map_offset_t end,
 156         int             flags,
 157         vm_map_t        zap_map);
 158
 159 static kern_return_t    vm_map_copy_overwrite_unaligned(
 160         vm_map_t        dst_map,
 161         vm_map_entry_t  entry,
 162         vm_map_copy_t   copy,
 163         vm_map_address_t start);
 164
 165 static kern_return_t    vm_map_copy_overwrite_aligned(
 166         vm_map_t        dst_map,
 167         vm_map_entry_t  tmp_entry,
 168         vm_map_copy_t   copy,
 169         vm_map_offset_t start,
 170         pmap_t          pmap);
 171
 172 static kern_return_t    vm_map_copyin_kernel_buffer(
 173         vm_map_t        src_map,
 174         vm_map_address_t src_addr,
 175         vm_map_size_t   len,
 176         boolean_t       src_destroy,
 177         vm_map_copy_t   *copy_result);  /* OUT */
 178
 179 static kern_return_t    vm_map_copyout_kernel_buffer(
 180         vm_map_t        map,
 181         vm_map_address_t *addr, /* IN/OUT */
 182         vm_map_copy_t   copy,
 183         boolean_t       overwrite);
 184
 185 static void             vm_map_fork_share(
 186         vm_map_t        old_map,
 187         vm_map_entry_t  old_entry,
 188         vm_map_t        new_map);
 189
 190 static boolean_t        vm_map_fork_copy(
 191         vm_map_t        old_map,
 192         vm_map_entry_t  *old_entry_p,
 193         vm_map_t        new_map);
 194
 195 void            vm_map_region_top_walk(
 196         vm_map_entry_t             entry,
 197         vm_region_top_info_t       top);
 198
 199 void            vm_map_region_walk(
 200         vm_map_t                   map,
 201         vm_map_offset_t            va,
 202         vm_map_entry_t             entry,
 203         vm_object_offset_t         offset,
 204         vm_object_size_t           range,
 205         vm_region_extended_info_t  extended,
 206         boolean_t                  look_for_pages);
 207
 208 static kern_return_t    vm_map_wire_nested(
 209         vm_map_t                   map,
 210         vm_map_offset_t            start,
 211         vm_map_offset_t            end,
 212         vm_prot_t                  access_type,
 213         boolean_t                  user_wire,
 214         pmap_t                     map_pmap,
 215         vm_map_offset_t            pmap_addr);
 216
 217 static kern_return_t    vm_map_unwire_nested(
 218         vm_map_t                   map,
 219         vm_map_offset_t            start,
 220         vm_map_offset_t            end,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr);
 224
 225 static kern_return_t    vm_map_overwrite_submap_recurse(
 226         vm_map_t                   dst_map,
 227         vm_map_offset_t            dst_addr,
 228         vm_map_size_t              dst_size);
 229
 230 static kern_return_t    vm_map_copy_overwrite_nested(
 231         vm_map_t                   dst_map,
 232         vm_map_offset_t            dst_addr,
 233         vm_map_copy_t              copy,
 234         boolean_t                  interruptible,
 235         pmap_t                     pmap,
 236         boolean_t                  discard_on_success);
 237
 238 static kern_return_t    vm_map_remap_extract(
 239         vm_map_t                map,
 240         vm_map_offset_t         addr,
 241         vm_map_size_t           size,
 242         boolean_t               copy,
 243         struct vm_map_header    *map_header,
 244         vm_prot_t               *cur_protection,
 245         vm_prot_t               *max_protection,
 246         vm_inherit_t            inheritance,
 247         boolean_t               pageable);
 248
 249 static kern_return_t    vm_map_remap_range_allocate(
 250         vm_map_t                map,
 251         vm_map_address_t        *address,
 252         vm_map_size_t           size,
 253         vm_map_offset_t         mask,
 254         int                     flags,
 255         vm_map_entry_t          *map_entry);
 256
 257 static void             vm_map_region_look_for_page(
 258         vm_map_t                   map,
 259         vm_map_offset_t            va,
 260         vm_object_t                object,
 261         vm_object_offset_t         offset,
 262         int                        max_refcnt,
 263         int                        depth,
 264         vm_region_extended_info_t  extended);
 265
 266 static int              vm_map_region_count_obj_refs(
 267         vm_map_entry_t             entry,
 268         vm_object_t                object);
 269
 270
 271 static kern_return_t    vm_map_willneed(
 272         vm_map_t        map,
 273         vm_map_offset_t start,
 274         vm_map_offset_t end);
 275
 276 static kern_return_t    vm_map_reuse_pages(
 277         vm_map_t        map,
 278         vm_map_offset_t start,
 279         vm_map_offset_t end);
 280
 281 static kern_return_t    vm_map_reusable_pages(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_can_reuse(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 #if CONFIG_FREEZE
 292 struct default_freezer_table;
 293 __private_extern__ void* default_freezer_mapping_create(vm_object_t, vm_offset_t);
 294 __private_extern__ void  default_freezer_mapping_free(void**, boolean_t all);
 295 #endif
 296
 297 /*
 298  * Macros to copy a vm_map_entry. We must be careful to correctly
 299  * manage the wired page count. vm_map_entry_copy() creates a new
 300  * map entry to the same memory - the wired count in the new entry
 301  * must be set to zero. vm_map_entry_copy_full() creates a new
 302  * entry that is identical to the old entry.  This preserves the
 303  * wire count; it's used for map splitting and zone changing in
 304  * vm_map_copyout.
 305  */
 306 #define vm_map_entry_copy(NEW,OLD) \
 307 MACRO_BEGIN                                     \
 308         *(NEW) = *(OLD);                \
 309         (NEW)->is_shared = FALSE;       \
 310         (NEW)->needs_wakeup = FALSE;    \
 311         (NEW)->in_transition = FALSE;   \
 312         (NEW)->wired_count = 0;         \
 313         (NEW)->user_wired_count = 0;    \
 314         (NEW)->permanent = FALSE;       \
 315 MACRO_END
 316
 317 #define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
 318
 319 /*
 320  *      Decide if we want to allow processes to execute from their data or stack areas.
 321  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 322  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 323  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 324  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 325  *      specific pmap files since the default behavior varies according to architecture.  The
 326  *      main reason it varies is because of the need to provide binary compatibility with old
 327  *      applications that were written before these restrictions came into being.  In the old
 328  *      days, an app could execute anything it could read, but this has slowly been tightened
 329  *      up over time.  The default behavior is:
 330  *
 331  *      32-bit PPC apps         may execute from both stack and data areas
 332  *      32-bit Intel apps       may exeucte from data areas but not stack
 333  *      64-bit PPC/Intel apps   may not execute from either data or stack
 334  *
 335  *      An application on any architecture may override these defaults by explicitly
 336  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 337  *      system call.  This code here just determines what happens when an app tries to
 338  *      execute from a page that lacks execute permission.
 339  *
 340  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 341  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 342  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 343  *      execution from data areas for a particular binary even if the arch normally permits it. As
 344  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 345  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 346  *      are not all NX-safe.
 347  */
 348
 349 extern int allow_data_exec, allow_stack_exec;
 350
 351 int
 352 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 353 {
 354         int current_abi;
 355
 356         /*
 357          * Determine if the app is running in 32 or 64 bit mode.
 358          */
 359
 360         if (vm_map_is_64bit(map))
 361                 current_abi = VM_ABI_64;
 362         else
 363                 current_abi = VM_ABI_32;
 364
 365         /*
 366          * Determine if we should allow the execution based on whether it's a
 367          * stack or data area and the current architecture.
 368          */
 369
 370         if (user_tag == VM_MEMORY_STACK)
 371                 return allow_stack_exec & current_abi;
 372
 373         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 374 }
 375
 376
 377 /*
 378  *      Virtual memory maps provide for the mapping, protection,
 379  *      and sharing of virtual memory objects.  In addition,
 380  *      this module provides for an efficient virtual copy of
 381  *      memory from one map to another.
 382  *
 383  *      Synchronization is required prior to most operations.
 384  *
 385  *      Maps consist of an ordered doubly-linked list of simple
 386  *      entries; a single hint is used to speed up lookups.
 387  *
 388  *      Sharing maps have been deleted from this version of Mach.
 389  *      All shared objects are now mapped directly into the respective
 390  *      maps.  This requires a change in the copy on write strategy;
 391  *      the asymmetric (delayed) strategy is used for shared temporary
 392  *      objects instead of the symmetric (shadow) strategy.  All maps
 393  *      are now "top level" maps (either task map, kernel map or submap
 394  *      of the kernel map).
 395  *
 396  *      Since portions of maps are specified by start/end addreses,
 397  *      which may not align with existing map entries, all
 398  *      routines merely "clip" entries to these start/end values.
 399  *      [That is, an entry is split into two, bordering at a
 400  *      start or end value.]  Note that these clippings may not
 401  *      always be necessary (as the two resulting entries are then
 402  *      not changed); however, the clipping is done for convenience.
 403  *      No attempt is currently made to "glue back together" two
 404  *      abutting entries.
 405  *
 406  *      The symmetric (shadow) copy strategy implements virtual copy
 407  *      by copying VM object references from one map to
 408  *      another, and then marking both regions as copy-on-write.
 409  *      It is important to note that only one writeable reference
 410  *      to a VM object region exists in any map when this strategy
 411  *      is used -- this means that shadow object creation can be
 412  *      delayed until a write operation occurs.  The symmetric (delayed)
 413  *      strategy allows multiple maps to have writeable references to
 414  *      the same region of a vm object, and hence cannot delay creating
 415  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 416  *      Copying of permanent objects is completely different; see
 417  *      vm_object_copy_strategically() in vm_object.c.
 418  */
 419
 420 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 421 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 422 static zone_t   vm_map_kentry_zone;     /* zone for kernel entry structures */
 423 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 424
 425
 426 /*
 427  *      Placeholder object for submap operations.  This object is dropped
 428  *      into the range by a call to vm_map_find, and removed when
 429  *      vm_map_submap creates the submap.
 430  */
 431
 432 vm_object_t     vm_submap_object;
 433
 434 static void             *map_data;
 435 static vm_size_t        map_data_size;
 436 static void             *kentry_data;
 437 static vm_size_t        kentry_data_size;
 438 static int              kentry_count = 2048;            /* to init kentry_data_size */
 439
 440 #if CONFIG_EMBEDDED
 441 #define         NO_COALESCE_LIMIT  0
 442 #else
 443 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 444 #endif
 445
 446 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 447 unsigned int not_in_kdp = 1;
 448
 449 unsigned int vm_map_set_cache_attr_count = 0;
 450
 451 kern_return_t
 452 vm_map_set_cache_attr(
 453         vm_map_t        map,
 454         vm_map_offset_t va)
 455 {
 456         vm_map_entry_t  map_entry;
 457         vm_object_t     object;
 458         kern_return_t   kr = KERN_SUCCESS;
 459
 460         vm_map_lock_read(map);
 461
 462         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 463             map_entry->is_sub_map) {
 464                 /*
 465                  * that memory is not properly mapped
 466                  */
 467                 kr = KERN_INVALID_ARGUMENT;
 468                 goto done;
 469         }
 470         object = map_entry->object.vm_object;
 471
 472         if (object == VM_OBJECT_NULL) {
 473                 /*
 474                  * there should be a VM object here at this point
 475                  */
 476                 kr = KERN_INVALID_ARGUMENT;
 477                 goto done;
 478         }
 479         vm_object_lock(object);
 480         object->set_cache_attr = TRUE;
 481         vm_object_unlock(object);
 482
 483         vm_map_set_cache_attr_count++;
 484 done:
 485         vm_map_unlock_read(map);
 486
 487         return kr;
 488 }
 489
 490
 491 #if CONFIG_CODE_DECRYPTION
 492 /*
 493  * vm_map_apple_protected:
 494  * This remaps the requested part of the object with an object backed by
 495  * the decrypting pager.
 496  * crypt_info contains entry points and session data for the crypt module.
 497  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 498  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 499  */
 500 kern_return_t
 501 vm_map_apple_protected(
 502         vm_map_t        map,
 503         vm_map_offset_t start,
 504         vm_map_offset_t end,
 505         struct pager_crypt_info *crypt_info)
 506 {
 507         boolean_t       map_locked;
 508         kern_return_t   kr;
 509         vm_map_entry_t  map_entry;
 510         memory_object_t protected_mem_obj;
 511         vm_object_t     protected_object;
 512         vm_map_offset_t map_addr;
 513
 514         vm_map_lock_read(map);
 515         map_locked = TRUE;
 516
 517         /* lookup the protected VM object */
 518         if (!vm_map_lookup_entry(map,
 519                                  start,
 520                                  &map_entry) ||
 521             map_entry->vme_end < end ||
 522             map_entry->is_sub_map) {
 523                 /* that memory is not properly mapped */
 524                 kr = KERN_INVALID_ARGUMENT;
 525                 goto done;
 526         }
 527         protected_object = map_entry->object.vm_object;
 528         if (protected_object == VM_OBJECT_NULL) {
 529                 /* there should be a VM object here at this point */
 530                 kr = KERN_INVALID_ARGUMENT;
 531                 goto done;
 532         }
 533
 534         /* make sure protected object stays alive while map is unlocked */
 535         vm_object_reference(protected_object);
 536
 537         vm_map_unlock_read(map);
 538         map_locked = FALSE;
 539
 540         /*
 541          * Lookup (and create if necessary) the protected memory object
 542          * matching that VM object.
 543          * If successful, this also grabs a reference on the memory object,
 544          * to guarantee that it doesn't go away before we get a chance to map
 545          * it.
 546          */
 547         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 548
 549         /* release extra ref on protected object */
 550         vm_object_deallocate(protected_object);
 551
 552         if (protected_mem_obj == NULL) {
 553                 kr = KERN_FAILURE;
 554                 goto done;
 555         }
 556
 557         /* map this memory object in place of the current one */
 558         map_addr = start;
 559         kr = vm_map_enter_mem_object(map,
 560                                      &map_addr,
 561                                      end - start,
 562                                      (mach_vm_offset_t) 0,
 563                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 564                                      (ipc_port_t) protected_mem_obj,
 565                                      (map_entry->offset +
 566                                       (start - map_entry->vme_start)),
 567                                      TRUE,
 568                                      map_entry->protection,
 569                                      map_entry->max_protection,
 570                                      map_entry->inheritance);
 571         assert(map_addr == start);
 572         /*
 573          * Release the reference obtained by apple_protect_pager_setup().
 574          * The mapping (if it succeeded) is now holding a reference on the
 575          * memory object.
 576          */
 577         memory_object_deallocate(protected_mem_obj);
 578
 579 done:
 580         if (map_locked) {
 581                 vm_map_unlock_read(map);
 582         }
 583         return kr;
 584 }
 585 #endif  /* CONFIG_CODE_DECRYPTION */
 586
 587
 588 lck_grp_t               vm_map_lck_grp;
 589 lck_grp_attr_t  vm_map_lck_grp_attr;
 590 lck_attr_t              vm_map_lck_attr;
 591
 592
 593 /*
 594  *      vm_map_init:
 595  *
 596  *      Initialize the vm_map module.  Must be called before
 597  *      any other vm_map routines.
 598  *
 599  *      Map and entry structures are allocated from zones -- we must
 600  *      initialize those zones.
 601  *
 602  *      There are three zones of interest:
 603  *
 604  *      vm_map_zone:            used to allocate maps.
 605  *      vm_map_entry_zone:      used to allocate map entries.
 606  *      vm_map_kentry_zone:     used to allocate map entries for the kernel.
 607  *
 608  *      The kernel allocates map entries from a special zone that is initially
 609  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 610  *      the kernel to allocate more memory to a entry zone when it became
 611  *      empty since the very act of allocating memory implies the creation
 612  *      of a new entry.
 613  */
 614 void
 615 vm_map_init(
 616         void)
 617 {
 618         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 619                             PAGE_SIZE, "maps");
 620         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 621
 622         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 623                                   1024*1024, PAGE_SIZE*5,
 624                                   "non-kernel map entries");
 625         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 626
 627         vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 628                                    kentry_data_size, kentry_data_size,
 629                                    "kernel map entries");
 630         zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
 631
 632         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 633                                  16*1024, PAGE_SIZE, "map copies");
 634         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 635
 636         /*
 637          *      Cram the map and kentry zones with initial data.
 638          *      Set kentry_zone non-collectible to aid zone_gc().
 639          */
 640         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 641         zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
 642         zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
 643         zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
 644         zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 645         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 646
 647         zcram(vm_map_zone, map_data, map_data_size);
 648         zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
 649
 650         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 651         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 652         lck_attr_setdefault(&vm_map_lck_attr);
 653 }
 654
 655 void
 656 vm_map_steal_memory(
 657         void)
 658 {
 659         map_data_size = round_page(10 * sizeof(struct _vm_map));
 660         map_data = pmap_steal_memory(map_data_size);
 661
 662 #if 0
 663         /*
 664          * Limiting worst case: vm_map_kentry_zone needs to map each "available"
 665          * physical page (i.e. that beyond the kernel image and page tables)
 666          * individually; we guess at most one entry per eight pages in the
 667          * real world. This works out to roughly .1 of 1% of physical memory,
 668          * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
 669          */
 670 #endif
 671         kentry_count = pmap_free_pages() / 8;
 672
 673
 674         kentry_data_size =
 675                 round_page(kentry_count * sizeof(struct vm_map_entry));
 676         kentry_data = pmap_steal_memory(kentry_data_size);
 677 }
 678
 679 /*
 680  *      vm_map_create:
 681  *
 682  *      Creates and returns a new empty VM map with
 683  *      the given physical map structure, and having
 684  *      the given lower and upper address bounds.
 685  */
 686 vm_map_t
 687 vm_map_create(
 688         pmap_t                  pmap,
 689         vm_map_offset_t min,
 690         vm_map_offset_t max,
 691         boolean_t               pageable)
 692 {
 693         static int              color_seed = 0;
 694         register vm_map_t       result;
 695
 696         result = (vm_map_t) zalloc(vm_map_zone);
 697         if (result == VM_MAP_NULL)
 698                 panic("vm_map_create");
 699
 700         vm_map_first_entry(result) = vm_map_to_entry(result);
 701         vm_map_last_entry(result)  = vm_map_to_entry(result);
 702         result->hdr.nentries = 0;
 703         result->hdr.entries_pageable = pageable;
 704
 705         vm_map_store_init( &(result->hdr) );
 706
 707         result->size = 0;
 708         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 709         result->user_wire_size  = 0;
 710         result->ref_count = 1;
 711 #if     TASK_SWAPPER
 712         result->res_count = 1;
 713         result->sw_state = MAP_SW_IN;
 714 #endif  /* TASK_SWAPPER */
 715         result->pmap = pmap;
 716         result->min_offset = min;
 717         result->max_offset = max;
 718         result->wiring_required = FALSE;
 719         result->no_zero_fill = FALSE;
 720         result->mapped = FALSE;
 721         result->wait_for_space = FALSE;
 722         result->switch_protect = FALSE;
 723         result->disable_vmentry_reuse = FALSE;
 724         result->map_disallow_data_exec = FALSE;
 725         result->highest_entry_end = 0;
 726         result->first_free = vm_map_to_entry(result);
 727         result->hint = vm_map_to_entry(result);
 728         result->color_rr = (color_seed++) & vm_color_mask;
 729         result->jit_entry_exists = FALSE;
 730 #if CONFIG_FREEZE
 731         result->default_freezer_toc = NULL;
 732 #endif
 733         vm_map_lock_init(result);
 734         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 735
 736         return(result);
 737 }
 738
 739 /*
 740  *      vm_map_entry_create:    [ internal use only ]
 741  *
 742  *      Allocates a VM map entry for insertion in the
 743  *      given map (or map copy).  No fields are filled.
 744  */
 745 #define vm_map_entry_create(map) \
 746         _vm_map_entry_create(&(map)->hdr)
 747
 748 #define vm_map_copy_entry_create(copy) \
 749         _vm_map_entry_create(&(copy)->cpy_hdr)
 750
 751 static vm_map_entry_t
 752 _vm_map_entry_create(
 753         register struct vm_map_header   *map_header)
 754 {
 755         register zone_t zone;
 756         register vm_map_entry_t entry;
 757
 758         if (map_header->entries_pageable)
 759                 zone = vm_map_entry_zone;
 760         else
 761                 zone = vm_map_kentry_zone;
 762
 763         entry = (vm_map_entry_t) zalloc(zone);
 764         if (entry == VM_MAP_ENTRY_NULL)
 765                 panic("vm_map_entry_create");
 766         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 767
 768         return(entry);
 769 }
 770
 771 /*
 772  *      vm_map_entry_dispose:   [ internal use only ]
 773  *
 774  *      Inverse of vm_map_entry_create.
 775  *
 776  *      write map lock held so no need to
 777  *      do anything special to insure correctness
 778  *      of the stores
 779  */
 780 #define vm_map_entry_dispose(map, entry)                        \
 781         vm_map_store_update( map, entry, VM_MAP_ENTRY_DELETE);  \
 782         _vm_map_entry_dispose(&(map)->hdr, (entry))
 783
 784 #define vm_map_copy_entry_dispose(map, entry) \
 785         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 786
 787 static void
 788 _vm_map_entry_dispose(
 789         register struct vm_map_header   *map_header,
 790         register vm_map_entry_t         entry)
 791 {
 792         register zone_t         zone;
 793
 794         if (map_header->entries_pageable)
 795                 zone = vm_map_entry_zone;
 796         else
 797                 zone = vm_map_kentry_zone;
 798
 799         zfree(zone, entry);
 800 }
 801
 802 #if MACH_ASSERT
 803 static boolean_t first_free_check = FALSE;
 804 boolean_t
 805 first_free_is_valid(
 806         vm_map_t        map)
 807 {
 808         if (!first_free_check)
 809                 return TRUE;
 810
 811         return( first_free_is_valid_store( map ));
 812 }
 813 #endif /* MACH_ASSERT */
 814
 815
 816 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 817         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 818
 819 #define vm_map_copy_entry_unlink(copy, entry)                           \
 820         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 821
 822 #if     MACH_ASSERT && TASK_SWAPPER
 823 /*
 824  *      vm_map_res_reference:
 825  *
 826  *      Adds another valid residence count to the given map.
 827  *
 828  *      Map is locked so this function can be called from
 829  *      vm_map_swapin.
 830  *
 831  */
 832 void vm_map_res_reference(register vm_map_t map)
 833 {
 834         /* assert map is locked */
 835         assert(map->res_count >= 0);
 836         assert(map->ref_count >= map->res_count);
 837         if (map->res_count == 0) {
 838                 lck_mtx_unlock(&map->s_lock);
 839                 vm_map_lock(map);
 840                 vm_map_swapin(map);
 841                 lck_mtx_lock(&map->s_lock);
 842                 ++map->res_count;
 843                 vm_map_unlock(map);
 844         } else
 845                 ++map->res_count;
 846 }
 847
 848 /*
 849  *      vm_map_reference_swap:
 850  *
 851  *      Adds valid reference and residence counts to the given map.
 852  *
 853  *      The map may not be in memory (i.e. zero residence count).
 854  *
 855  */
 856 void vm_map_reference_swap(register vm_map_t map)
 857 {
 858         assert(map != VM_MAP_NULL);
 859         lck_mtx_lock(&map->s_lock);
 860         assert(map->res_count >= 0);
 861         assert(map->ref_count >= map->res_count);
 862         map->ref_count++;
 863         vm_map_res_reference(map);
 864         lck_mtx_unlock(&map->s_lock);
 865 }
 866
 867 /*
 868  *      vm_map_res_deallocate:
 869  *
 870  *      Decrement residence count on a map; possibly causing swapout.
 871  *
 872  *      The map must be in memory (i.e. non-zero residence count).
 873  *
 874  *      The map is locked, so this function is callable from vm_map_deallocate.
 875  *
 876  */
 877 void vm_map_res_deallocate(register vm_map_t map)
 878 {
 879         assert(map->res_count > 0);
 880         if (--map->res_count == 0) {
 881                 lck_mtx_unlock(&map->s_lock);
 882                 vm_map_lock(map);
 883                 vm_map_swapout(map);
 884                 vm_map_unlock(map);
 885                 lck_mtx_lock(&map->s_lock);
 886         }
 887         assert(map->ref_count >= map->res_count);
 888 }
 889 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 890
 891 /*
 892  *      vm_map_destroy:
 893  *
 894  *      Actually destroy a map.
 895  */
 896 void
 897 vm_map_destroy(
 898         vm_map_t        map,
 899         int             flags)
 900 {
 901         vm_map_lock(map);
 902
 903         /* clean up regular map entries */
 904         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 905                              flags, VM_MAP_NULL);
 906         /* clean up leftover special mappings (commpage, etc...) */
 907         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 908                              flags, VM_MAP_NULL);
 909
 910 #if CONFIG_FREEZE
 911         if (map->default_freezer_toc){
 912                 default_freezer_mapping_free( &(map->default_freezer_toc), TRUE);
 913         }
 914 #endif
 915         vm_map_unlock(map);
 916
 917         assert(map->hdr.nentries == 0);
 918
 919         if(map->pmap)
 920                 pmap_destroy(map->pmap);
 921
 922         zfree(vm_map_zone, map);
 923 }
 924
 925 #if     TASK_SWAPPER
 926 /*
 927  * vm_map_swapin/vm_map_swapout
 928  *
 929  * Swap a map in and out, either referencing or releasing its resources.
 930  * These functions are internal use only; however, they must be exported
 931  * because they may be called from macros, which are exported.
 932  *
 933  * In the case of swapout, there could be races on the residence count,
 934  * so if the residence count is up, we return, assuming that a
 935  * vm_map_deallocate() call in the near future will bring us back.
 936  *
 937  * Locking:
 938  *      -- We use the map write lock for synchronization among races.
 939  *      -- The map write lock, and not the simple s_lock, protects the
 940  *         swap state of the map.
 941  *      -- If a map entry is a share map, then we hold both locks, in
 942  *         hierarchical order.
 943  *
 944  * Synchronization Notes:
 945  *      1) If a vm_map_swapin() call happens while swapout in progress, it
 946  *      will block on the map lock and proceed when swapout is through.
 947  *      2) A vm_map_reference() call at this time is illegal, and will
 948  *      cause a panic.  vm_map_reference() is only allowed on resident
 949  *      maps, since it refuses to block.
 950  *      3) A vm_map_swapin() call during a swapin will block, and
 951  *      proceeed when the first swapin is done, turning into a nop.
 952  *      This is the reason the res_count is not incremented until
 953  *      after the swapin is complete.
 954  *      4) There is a timing hole after the checks of the res_count, before
 955  *      the map lock is taken, during which a swapin may get the lock
 956  *      before a swapout about to happen.  If this happens, the swapin
 957  *      will detect the state and increment the reference count, causing
 958  *      the swapout to be a nop, thereby delaying it until a later
 959  *      vm_map_deallocate.  If the swapout gets the lock first, then
 960  *      the swapin will simply block until the swapout is done, and
 961  *      then proceed.
 962  *
 963  * Because vm_map_swapin() is potentially an expensive operation, it
 964  * should be used with caution.
 965  *
 966  * Invariants:
 967  *      1) A map with a residence count of zero is either swapped, or
 968  *         being swapped.
 969  *      2) A map with a non-zero residence count is either resident,
 970  *         or being swapped in.
 971  */
 972
 973 int vm_map_swap_enable = 1;
 974
 975 void vm_map_swapin (vm_map_t map)
 976 {
 977         register vm_map_entry_t entry;
 978
 979         if (!vm_map_swap_enable)        /* debug */
 980                 return;
 981
 982         /*
 983          * Map is locked
 984          * First deal with various races.
 985          */
 986         if (map->sw_state == MAP_SW_IN)
 987                 /*
 988                  * we raced with swapout and won.  Returning will incr.
 989                  * the res_count, turning the swapout into a nop.
 990                  */
 991                 return;
 992
 993         /*
 994          * The residence count must be zero.  If we raced with another
 995          * swapin, the state would have been IN; if we raced with a
 996          * swapout (after another competing swapin), we must have lost
 997          * the race to get here (see above comment), in which case
 998          * res_count is still 0.
 999          */
1000         assert(map->res_count == 0);
1001
1002         /*
1003          * There are no intermediate states of a map going out or
1004          * coming in, since the map is locked during the transition.
1005          */
1006         assert(map->sw_state == MAP_SW_OUT);
1007
1008         /*
1009          * We now operate upon each map entry.  If the entry is a sub-
1010          * or share-map, we call vm_map_res_reference upon it.
1011          * If the entry is an object, we call vm_object_res_reference
1012          * (this may iterate through the shadow chain).
1013          * Note that we hold the map locked the entire time,
1014          * even if we get back here via a recursive call in
1015          * vm_map_res_reference.
1016          */
1017         entry = vm_map_first_entry(map);
1018
1019         while (entry != vm_map_to_entry(map)) {
1020                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1021                         if (entry->is_sub_map) {
1022                                 vm_map_t lmap = entry->object.sub_map;
1023                                 lck_mtx_lock(&lmap->s_lock);
1024                                 vm_map_res_reference(lmap);
1025                                 lck_mtx_unlock(&lmap->s_lock);
1026                         } else {
1027                                 vm_object_t object = entry->object.vm_object;
1028                                 vm_object_lock(object);
1029                                 /*
1030                                  * This call may iterate through the
1031                                  * shadow chain.
1032                                  */
1033                                 vm_object_res_reference(object);
1034                                 vm_object_unlock(object);
1035                         }
1036                 }
1037                 entry = entry->vme_next;
1038         }
1039         assert(map->sw_state == MAP_SW_OUT);
1040         map->sw_state = MAP_SW_IN;
1041 }
1042
1043 void vm_map_swapout(vm_map_t map)
1044 {
1045         register vm_map_entry_t entry;
1046
1047         /*
1048          * Map is locked
1049          * First deal with various races.
1050          * If we raced with a swapin and lost, the residence count
1051          * will have been incremented to 1, and we simply return.
1052          */
1053         lck_mtx_lock(&map->s_lock);
1054         if (map->res_count != 0) {
1055                 lck_mtx_unlock(&map->s_lock);
1056                 return;
1057         }
1058         lck_mtx_unlock(&map->s_lock);
1059
1060         /*
1061          * There are no intermediate states of a map going out or
1062          * coming in, since the map is locked during the transition.
1063          */
1064         assert(map->sw_state == MAP_SW_IN);
1065
1066         if (!vm_map_swap_enable)
1067                 return;
1068
1069         /*
1070          * We now operate upon each map entry.  If the entry is a sub-
1071          * or share-map, we call vm_map_res_deallocate upon it.
1072          * If the entry is an object, we call vm_object_res_deallocate
1073          * (this may iterate through the shadow chain).
1074          * Note that we hold the map locked the entire time,
1075          * even if we get back here via a recursive call in
1076          * vm_map_res_deallocate.
1077          */
1078         entry = vm_map_first_entry(map);
1079
1080         while (entry != vm_map_to_entry(map)) {
1081                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1082                         if (entry->is_sub_map) {
1083                                 vm_map_t lmap = entry->object.sub_map;
1084                                 lck_mtx_lock(&lmap->s_lock);
1085                                 vm_map_res_deallocate(lmap);
1086                                 lck_mtx_unlock(&lmap->s_lock);
1087                         } else {
1088                                 vm_object_t object = entry->object.vm_object;
1089                                 vm_object_lock(object);
1090                                 /*
1091                                  * This call may take a long time,
1092                                  * since it could actively push
1093                                  * out pages (if we implement it
1094                                  * that way).
1095                                  */
1096                                 vm_object_res_deallocate(object);
1097                                 vm_object_unlock(object);
1098                         }
1099                 }
1100                 entry = entry->vme_next;
1101         }
1102         assert(map->sw_state == MAP_SW_IN);
1103         map->sw_state = MAP_SW_OUT;
1104 }
1105
1106 #endif  /* TASK_SWAPPER */
1107
1108 /*
1109  *      vm_map_lookup_entry:    [ internal use only ]
1110  *
1111  *      Calls into the vm map store layer to find the map
1112  *      entry containing (or immediately preceding) the
1113  *      specified address in the given map; the entry is returned
1114  *      in the "entry" parameter.  The boolean
1115  *      result indicates whether the address is
1116  *      actually contained in the map.
1117  */
1118 boolean_t
1119 vm_map_lookup_entry(
1120         register vm_map_t               map,
1121         register vm_map_offset_t        address,
1122         vm_map_entry_t          *entry)         /* OUT */
1123 {
1124         return ( vm_map_store_lookup_entry( map, address, entry ));
1125 }
1126
1127 /*
1128  *      Routine:        vm_map_find_space
1129  *      Purpose:
1130  *              Allocate a range in the specified virtual address map,
1131  *              returning the entry allocated for that range.
1132  *              Used by kmem_alloc, etc.
1133  *
1134  *              The map must be NOT be locked. It will be returned locked
1135  *              on KERN_SUCCESS, unlocked on failure.
1136  *
1137  *              If an entry is allocated, the object/offset fields
1138  *              are initialized to zero.
1139  */
1140 kern_return_t
1141 vm_map_find_space(
1142         register vm_map_t       map,
1143         vm_map_offset_t         *address,       /* OUT */
1144         vm_map_size_t           size,
1145         vm_map_offset_t         mask,
1146         int                     flags,
1147         vm_map_entry_t          *o_entry)       /* OUT */
1148 {
1149         register vm_map_entry_t entry, new_entry;
1150         register vm_map_offset_t        start;
1151         register vm_map_offset_t        end;
1152
1153         if (size == 0) {
1154                 *address = 0;
1155                 return KERN_INVALID_ARGUMENT;
1156         }
1157
1158         if (flags & VM_FLAGS_GUARD_AFTER) {
1159                 /* account for the back guard page in the size */
1160                 size += PAGE_SIZE_64;
1161         }
1162
1163         new_entry = vm_map_entry_create(map);
1164
1165         /*
1166          *      Look for the first possible address; if there's already
1167          *      something at this address, we have to start after it.
1168          */
1169
1170         vm_map_lock(map);
1171
1172         if( map->disable_vmentry_reuse == TRUE) {
1173                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1174         } else {
1175                 assert(first_free_is_valid(map));
1176                 if ((entry = map->first_free) == vm_map_to_entry(map))
1177                         start = map->min_offset;
1178                 else
1179                         start = entry->vme_end;
1180         }
1181
1182         /*
1183          *      In any case, the "entry" always precedes
1184          *      the proposed new region throughout the loop:
1185          */
1186
1187         while (TRUE) {
1188                 register vm_map_entry_t next;
1189
1190                 /*
1191                  *      Find the end of the proposed new region.
1192                  *      Be sure we didn't go beyond the end, or
1193                  *      wrap around the address.
1194                  */
1195
1196                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1197                         /* reserve space for the front guard page */
1198                         start += PAGE_SIZE_64;
1199                 }
1200                 end = ((start + mask) & ~mask);
1201
1202                 if (end < start) {
1203                         vm_map_entry_dispose(map, new_entry);
1204                         vm_map_unlock(map);
1205                         return(KERN_NO_SPACE);
1206                 }
1207                 start = end;
1208                 end += size;
1209
1210                 if ((end > map->max_offset) || (end < start)) {
1211                         vm_map_entry_dispose(map, new_entry);
1212                         vm_map_unlock(map);
1213                         return(KERN_NO_SPACE);
1214                 }
1215
1216                 /*
1217                  *      If there are no more entries, we must win.
1218                  */
1219
1220                 next = entry->vme_next;
1221                 if (next == vm_map_to_entry(map))
1222                         break;
1223
1224                 /*
1225                  *      If there is another entry, it must be
1226                  *      after the end of the potential new region.
1227                  */
1228
1229                 if (next->vme_start >= end)
1230                         break;
1231
1232                 /*
1233                  *      Didn't fit -- move to the next entry.
1234                  */
1235
1236                 entry = next;
1237                 start = entry->vme_end;
1238         }
1239
1240         /*
1241          *      At this point,
1242          *              "start" and "end" should define the endpoints of the
1243          *                      available new range, and
1244          *              "entry" should refer to the region before the new
1245          *                      range, and
1246          *
1247          *              the map should be locked.
1248          */
1249
1250         if (flags & VM_FLAGS_GUARD_BEFORE) {
1251                 /* go back for the front guard page */
1252                 start -= PAGE_SIZE_64;
1253         }
1254         *address = start;
1255
1256         assert(start < end);
1257         new_entry->vme_start = start;
1258         new_entry->vme_end = end;
1259         assert(page_aligned(new_entry->vme_start));
1260         assert(page_aligned(new_entry->vme_end));
1261
1262         new_entry->is_shared = FALSE;
1263         new_entry->is_sub_map = FALSE;
1264         new_entry->use_pmap = FALSE;
1265         new_entry->object.vm_object = VM_OBJECT_NULL;
1266         new_entry->offset = (vm_object_offset_t) 0;
1267
1268         new_entry->needs_copy = FALSE;
1269
1270         new_entry->inheritance = VM_INHERIT_DEFAULT;
1271         new_entry->protection = VM_PROT_DEFAULT;
1272         new_entry->max_protection = VM_PROT_ALL;
1273         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1274         new_entry->wired_count = 0;
1275         new_entry->user_wired_count = 0;
1276
1277         new_entry->in_transition = FALSE;
1278         new_entry->needs_wakeup = FALSE;
1279         new_entry->no_cache = FALSE;
1280         new_entry->permanent = FALSE;
1281         new_entry->superpage_size = 0;
1282
1283         new_entry->alias = 0;
1284         new_entry->zero_wired_pages = FALSE;
1285
1286         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1287
1288         /*
1289          *      Insert the new entry into the list
1290          */
1291
1292         vm_map_store_entry_link(map, entry, new_entry);
1293
1294         map->size += size;
1295
1296         /*
1297          *      Update the lookup hint
1298          */
1299         SAVE_HINT_MAP_WRITE(map, new_entry);
1300
1301         *o_entry = new_entry;
1302         return(KERN_SUCCESS);
1303 }
1304
1305 int vm_map_pmap_enter_print = FALSE;
1306 int vm_map_pmap_enter_enable = FALSE;
1307
1308 /*
1309  *      Routine:        vm_map_pmap_enter [internal only]
1310  *
1311  *      Description:
1312  *              Force pages from the specified object to be entered into
1313  *              the pmap at the specified address if they are present.
1314  *              As soon as a page not found in the object the scan ends.
1315  *
1316  *      Returns:
1317  *              Nothing.
1318  *
1319  *      In/out conditions:
1320  *              The source map should not be locked on entry.
1321  */
1322 static void
1323 vm_map_pmap_enter(
1324         vm_map_t                map,
1325         register vm_map_offset_t        addr,
1326         register vm_map_offset_t        end_addr,
1327         register vm_object_t    object,
1328         vm_object_offset_t      offset,
1329         vm_prot_t               protection)
1330 {
1331         int                     type_of_fault;
1332         kern_return_t           kr;
1333
1334         if(map->pmap == 0)
1335                 return;
1336
1337         while (addr < end_addr) {
1338                 register vm_page_t      m;
1339
1340                 vm_object_lock(object);
1341
1342                 m = vm_page_lookup(object, offset);
1343                 /*
1344                  * ENCRYPTED SWAP:
1345                  * The user should never see encrypted data, so do not
1346                  * enter an encrypted page in the page table.
1347                  */
1348                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1349                     m->fictitious ||
1350                     (m->unusual && ( m->error || m->restart || m->absent))) {
1351                         vm_object_unlock(object);
1352                         return;
1353                 }
1354
1355                 if (vm_map_pmap_enter_print) {
1356                         printf("vm_map_pmap_enter:");
1357                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1358                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1359                 }
1360                 type_of_fault = DBG_CACHE_HIT_FAULT;
1361                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1362                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1363                                     &type_of_fault);
1364
1365                 vm_object_unlock(object);
1366
1367                 offset += PAGE_SIZE_64;
1368                 addr += PAGE_SIZE;
1369         }
1370 }
1371
1372 boolean_t vm_map_pmap_is_empty(
1373         vm_map_t        map,
1374         vm_map_offset_t start,
1375         vm_map_offset_t end);
1376 boolean_t vm_map_pmap_is_empty(
1377         vm_map_t        map,
1378         vm_map_offset_t start,
1379         vm_map_offset_t end)
1380 {
1381 #ifdef MACHINE_PMAP_IS_EMPTY
1382         return pmap_is_empty(map->pmap, start, end);
1383 #else   /* MACHINE_PMAP_IS_EMPTY */
1384         vm_map_offset_t offset;
1385         ppnum_t         phys_page;
1386
1387         if (map->pmap == NULL) {
1388                 return TRUE;
1389         }
1390
1391         for (offset = start;
1392              offset < end;
1393              offset += PAGE_SIZE) {
1394                 phys_page = pmap_find_phys(map->pmap, offset);
1395                 if (phys_page) {
1396                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1397                                 "page %d at 0x%llx\n",
1398                                 map, (long long)start, (long long)end,
1399                                 phys_page, (long long)offset);
1400                         return FALSE;
1401                 }
1402         }
1403         return TRUE;
1404 #endif  /* MACHINE_PMAP_IS_EMPTY */
1405 }
1406
1407 /*
1408  *      Routine:        vm_map_enter
1409  *
1410  *      Description:
1411  *              Allocate a range in the specified virtual address map.
1412  *              The resulting range will refer to memory defined by
1413  *              the given memory object and offset into that object.
1414  *
1415  *              Arguments are as defined in the vm_map call.
1416  */
1417 int _map_enter_debug = 0;
1418 static unsigned int vm_map_enter_restore_successes = 0;
1419 static unsigned int vm_map_enter_restore_failures = 0;
1420 kern_return_t
1421 vm_map_enter(
1422         vm_map_t                map,
1423         vm_map_offset_t         *address,       /* IN/OUT */
1424         vm_map_size_t           size,
1425         vm_map_offset_t         mask,
1426         int                     flags,
1427         vm_object_t             object,
1428         vm_object_offset_t      offset,
1429         boolean_t               needs_copy,
1430         vm_prot_t               cur_protection,
1431         vm_prot_t               max_protection,
1432         vm_inherit_t            inheritance)
1433 {
1434         vm_map_entry_t          entry, new_entry;
1435         vm_map_offset_t         start, tmp_start, tmp_offset;
1436         vm_map_offset_t         end, tmp_end;
1437         vm_map_offset_t         tmp2_start, tmp2_end;
1438         vm_map_offset_t         step;
1439         kern_return_t           result = KERN_SUCCESS;
1440         vm_map_t                zap_old_map = VM_MAP_NULL;
1441         vm_map_t                zap_new_map = VM_MAP_NULL;
1442         boolean_t               map_locked = FALSE;
1443         boolean_t               pmap_empty = TRUE;
1444         boolean_t               new_mapping_established = FALSE;
1445         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1446         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1447         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1448         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1449         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1450         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1451         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1452         char                    alias;
1453         vm_map_offset_t         effective_min_offset, effective_max_offset;
1454         kern_return_t           kr;
1455
1456         if (superpage_size) {
1457                 switch (superpage_size) {
1458                         /*
1459                          * Note that the current implementation only supports
1460                          * a single size for superpages, SUPERPAGE_SIZE, per
1461                          * architecture. As soon as more sizes are supposed
1462                          * to be supported, SUPERPAGE_SIZE has to be replaced
1463                          * with a lookup of the size depending on superpage_size.
1464                          */
1465 #ifdef __x86_64__
1466                         case SUPERPAGE_SIZE_ANY:
1467                                 /* handle it like 2 MB and round up to page size */
1468                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1469                         case SUPERPAGE_SIZE_2MB:
1470                                 break;
1471 #endif
1472                         default:
1473                                 return KERN_INVALID_ARGUMENT;
1474                 }
1475                 mask = SUPERPAGE_SIZE-1;
1476                 if (size & (SUPERPAGE_SIZE-1))
1477                         return KERN_INVALID_ARGUMENT;
1478                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1479         }
1480
1481
1482 #if CONFIG_EMBEDDED
1483         if (cur_protection & VM_PROT_WRITE){
1484                 if ((cur_protection & VM_PROT_EXECUTE) && !(flags & VM_FLAGS_MAP_JIT)){
1485                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1486                         cur_protection &= ~VM_PROT_EXECUTE;
1487                 }
1488         }
1489 #endif /* CONFIG_EMBEDDED */
1490
1491         if (is_submap) {
1492                 if (purgable) {
1493                         /* submaps can not be purgeable */
1494                         return KERN_INVALID_ARGUMENT;
1495                 }
1496                 if (object == VM_OBJECT_NULL) {
1497                         /* submaps can not be created lazily */
1498                         return KERN_INVALID_ARGUMENT;
1499                 }
1500         }
1501         if (flags & VM_FLAGS_ALREADY) {
1502                 /*
1503                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1504                  * is already present.  For it to be meaningul, the requested
1505                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1506                  * we shouldn't try and remove what was mapped there first
1507                  * (!VM_FLAGS_OVERWRITE).
1508                  */
1509                 if ((flags & VM_FLAGS_ANYWHERE) ||
1510                     (flags & VM_FLAGS_OVERWRITE)) {
1511                         return KERN_INVALID_ARGUMENT;
1512                 }
1513         }
1514
1515         effective_min_offset = map->min_offset;
1516
1517         if (flags & VM_FLAGS_BEYOND_MAX) {
1518                 /*
1519                  * Allow an insertion beyond the map's max offset.
1520                  */
1521                 if (vm_map_is_64bit(map))
1522                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1523                 else
1524                         effective_max_offset = 0x00000000FFFFF000ULL;
1525         } else {
1526                 effective_max_offset = map->max_offset;
1527         }
1528
1529         if (size == 0 ||
1530             (offset & PAGE_MASK_64) != 0) {
1531                 *address = 0;
1532                 return KERN_INVALID_ARGUMENT;
1533         }
1534
1535         VM_GET_FLAGS_ALIAS(flags, alias);
1536
1537 #define RETURN(value)   { result = value; goto BailOut; }
1538
1539         assert(page_aligned(*address));
1540         assert(page_aligned(size));
1541
1542         /*
1543          * Only zero-fill objects are allowed to be purgable.
1544          * LP64todo - limit purgable objects to 32-bits for now
1545          */
1546         if (purgable &&
1547             (offset != 0 ||
1548              (object != VM_OBJECT_NULL &&
1549               (object->vo_size != size ||
1550                object->purgable == VM_PURGABLE_DENY))
1551              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1552                 return KERN_INVALID_ARGUMENT;
1553
1554         if (!anywhere && overwrite) {
1555                 /*
1556                  * Create a temporary VM map to hold the old mappings in the
1557                  * affected area while we create the new one.
1558                  * This avoids releasing the VM map lock in
1559                  * vm_map_entry_delete() and allows atomicity
1560                  * when we want to replace some mappings with a new one.
1561                  * It also allows us to restore the old VM mappings if the
1562                  * new mapping fails.
1563                  */
1564                 zap_old_map = vm_map_create(PMAP_NULL,
1565                                             *address,
1566                                             *address + size,
1567                                             map->hdr.entries_pageable);
1568         }
1569
1570 StartAgain: ;
1571
1572         start = *address;
1573
1574         if (anywhere) {
1575                 vm_map_lock(map);
1576                 map_locked = TRUE;
1577
1578                 if ((flags & VM_FLAGS_MAP_JIT) && (map->jit_entry_exists)){
1579                         result = KERN_INVALID_ARGUMENT;
1580                         goto BailOut;
1581                 }
1582
1583                 /*
1584                  *      Calculate the first possible address.
1585                  */
1586
1587                 if (start < effective_min_offset)
1588                         start = effective_min_offset;
1589                 if (start > effective_max_offset)
1590                         RETURN(KERN_NO_SPACE);
1591
1592                 /*
1593                  *      Look for the first possible address;
1594                  *      if there's already something at this
1595                  *      address, we have to start after it.
1596                  */
1597
1598                 if( map->disable_vmentry_reuse == TRUE) {
1599                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1600                 } else {
1601                         assert(first_free_is_valid(map));
1602
1603                         entry = map->first_free;
1604
1605                         if (entry == vm_map_to_entry(map)) {
1606                                 entry = NULL;
1607                         } else {
1608                                if (entry->vme_next == vm_map_to_entry(map)){
1609                                        /*
1610                                         * Hole at the end of the map.
1611                                         */
1612                                         entry = NULL;
1613                                } else {
1614                                         if (start < (entry->vme_next)->vme_start ) {
1615                                                 start = entry->vme_end;
1616                                         } else {
1617                                                 /*
1618                                                  * Need to do a lookup.
1619                                                  */
1620                                                 entry = NULL;
1621                                         }
1622                                }
1623                         }
1624
1625                         if (entry == NULL) {
1626                                 vm_map_entry_t  tmp_entry;
1627                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
1628                                         start = tmp_entry->vme_end;
1629                                 entry = tmp_entry;
1630                         }
1631                 }
1632
1633                 /*
1634                  *      In any case, the "entry" always precedes
1635                  *      the proposed new region throughout the
1636                  *      loop:
1637                  */
1638
1639                 while (TRUE) {
1640                         register vm_map_entry_t next;
1641
1642                         /*
1643                          *      Find the end of the proposed new region.
1644                          *      Be sure we didn't go beyond the end, or
1645                          *      wrap around the address.
1646                          */
1647
1648                         end = ((start + mask) & ~mask);
1649                         if (end < start)
1650                                 RETURN(KERN_NO_SPACE);
1651                         start = end;
1652                         end += size;
1653
1654                         if ((end > effective_max_offset) || (end < start)) {
1655                                 if (map->wait_for_space) {
1656                                         if (size <= (effective_max_offset -
1657                                                      effective_min_offset)) {
1658                                                 assert_wait((event_t)map,
1659                                                             THREAD_ABORTSAFE);
1660                                                 vm_map_unlock(map);
1661                                                 map_locked = FALSE;
1662                                                 thread_block(THREAD_CONTINUE_NULL);
1663                                                 goto StartAgain;
1664                                         }
1665                                 }
1666                                 RETURN(KERN_NO_SPACE);
1667                         }
1668
1669                         /*
1670                          *      If there are no more entries, we must win.
1671                          */
1672
1673                         next = entry->vme_next;
1674                         if (next == vm_map_to_entry(map))
1675                                 break;
1676
1677                         /*
1678                          *      If there is another entry, it must be
1679                          *      after the end of the potential new region.
1680                          */
1681
1682                         if (next->vme_start >= end)
1683                                 break;
1684
1685                         /*
1686                          *      Didn't fit -- move to the next entry.
1687                          */
1688
1689                         entry = next;
1690                         start = entry->vme_end;
1691                 }
1692                 *address = start;
1693         } else {
1694                 /*
1695                  *      Verify that:
1696                  *              the address doesn't itself violate
1697                  *              the mask requirement.
1698                  */
1699
1700                 vm_map_lock(map);
1701                 map_locked = TRUE;
1702                 if ((start & mask) != 0)
1703                         RETURN(KERN_NO_SPACE);
1704
1705                 /*
1706                  *      ...     the address is within bounds
1707                  */
1708
1709                 end = start + size;
1710
1711                 if ((start < effective_min_offset) ||
1712                     (end > effective_max_offset) ||
1713                     (start >= end)) {
1714                         RETURN(KERN_INVALID_ADDRESS);
1715                 }
1716
1717                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1718                         /*
1719                          * Fixed mapping and "overwrite" flag: attempt to
1720                          * remove all existing mappings in the specified
1721                          * address range, saving them in our "zap_old_map".
1722                          */
1723                         (void) vm_map_delete(map, start, end,
1724                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1725                                              zap_old_map);
1726                 }
1727
1728                 /*
1729                  *      ...     the starting address isn't allocated
1730                  */
1731
1732                 if (vm_map_lookup_entry(map, start, &entry)) {
1733                         if (! (flags & VM_FLAGS_ALREADY)) {
1734                                 RETURN(KERN_NO_SPACE);
1735                         }
1736                         /*
1737                          * Check if what's already there is what we want.
1738                          */
1739                         tmp_start = start;
1740                         tmp_offset = offset;
1741                         if (entry->vme_start < start) {
1742                                 tmp_start -= start - entry->vme_start;
1743                                 tmp_offset -= start - entry->vme_start;
1744
1745                         }
1746                         for (; entry->vme_start < end;
1747                              entry = entry->vme_next) {
1748                                 /*
1749                                  * Check if the mapping's attributes
1750                                  * match the existing map entry.
1751                                  */
1752                                 if (entry == vm_map_to_entry(map) ||
1753                                     entry->vme_start != tmp_start ||
1754                                     entry->is_sub_map != is_submap ||
1755                                     entry->offset != tmp_offset ||
1756                                     entry->needs_copy != needs_copy ||
1757                                     entry->protection != cur_protection ||
1758                                     entry->max_protection != max_protection ||
1759                                     entry->inheritance != inheritance ||
1760                                     entry->alias != alias) {
1761                                         /* not the same mapping ! */
1762                                         RETURN(KERN_NO_SPACE);
1763                                 }
1764                                 /*
1765                                  * Check if the same object is being mapped.
1766                                  */
1767                                 if (is_submap) {
1768                                         if (entry->object.sub_map !=
1769                                             (vm_map_t) object) {
1770                                                 /* not the same submap */
1771                                                 RETURN(KERN_NO_SPACE);
1772                                         }
1773                                 } else {
1774                                         if (entry->object.vm_object != object) {
1775                                                 /* not the same VM object... */
1776                                                 vm_object_t obj2;
1777
1778                                                 obj2 = entry->object.vm_object;
1779                                                 if ((obj2 == VM_OBJECT_NULL ||
1780                                                      obj2->internal) &&
1781                                                     (object == VM_OBJECT_NULL ||
1782                                                      object->internal)) {
1783                                                         /*
1784                                                          * ... but both are
1785                                                          * anonymous memory,
1786                                                          * so equivalent.
1787                                                          */
1788                                                 } else {
1789                                                         RETURN(KERN_NO_SPACE);
1790                                                 }
1791                                         }
1792                                 }
1793
1794                                 tmp_offset += entry->vme_end - entry->vme_start;
1795                                 tmp_start += entry->vme_end - entry->vme_start;
1796                                 if (entry->vme_end >= end) {
1797                                         /* reached the end of our mapping */
1798                                         break;
1799                                 }
1800                         }
1801                         /* it all matches:  let's use what's already there ! */
1802                         RETURN(KERN_MEMORY_PRESENT);
1803                 }
1804
1805                 /*
1806                  *      ...     the next region doesn't overlap the
1807                  *              end point.
1808                  */
1809
1810                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1811                     (entry->vme_next->vme_start < end))
1812                         RETURN(KERN_NO_SPACE);
1813         }
1814
1815         /*
1816          *      At this point,
1817          *              "start" and "end" should define the endpoints of the
1818          *                      available new range, and
1819          *              "entry" should refer to the region before the new
1820          *                      range, and
1821          *
1822          *              the map should be locked.
1823          */
1824
1825         /*
1826          *      See whether we can avoid creating a new entry (and object) by
1827          *      extending one of our neighbors.  [So far, we only attempt to
1828          *      extend from below.]  Note that we can never extend/join
1829          *      purgable objects because they need to remain distinct
1830          *      entities in order to implement their "volatile object"
1831          *      semantics.
1832          */
1833
1834         if (purgable) {
1835                 if (object == VM_OBJECT_NULL) {
1836                         object = vm_object_allocate(size);
1837                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1838                         object->purgable = VM_PURGABLE_NONVOLATILE;
1839                         offset = (vm_object_offset_t)0;
1840                 }
1841         } else if ((is_submap == FALSE) &&
1842                    (object == VM_OBJECT_NULL) &&
1843                    (entry != vm_map_to_entry(map)) &&
1844                    (entry->vme_end == start) &&
1845                    (!entry->is_shared) &&
1846                    (!entry->is_sub_map) &&
1847                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1848                    (entry->inheritance == inheritance) &&
1849                    (entry->protection == cur_protection) &&
1850                    (entry->max_protection == max_protection) &&
1851                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1852                    (entry->in_transition == 0) &&
1853                    (entry->no_cache == no_cache) &&
1854                    ((entry->vme_end - entry->vme_start) + size <=
1855                     (alias == VM_MEMORY_REALLOC ?
1856                      ANON_CHUNK_SIZE :
1857                      NO_COALESCE_LIMIT)) &&
1858                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1859                 if (vm_object_coalesce(entry->object.vm_object,
1860                                        VM_OBJECT_NULL,
1861                                        entry->offset,
1862                                        (vm_object_offset_t) 0,
1863                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1864                                        (vm_map_size_t)(end - entry->vme_end))) {
1865
1866                         /*
1867                          *      Coalesced the two objects - can extend
1868                          *      the previous map entry to include the
1869                          *      new range.
1870                          */
1871                         map->size += (end - entry->vme_end);
1872                         assert(entry->vme_start < end);
1873                         entry->vme_end = end;
1874                         vm_map_store_update_first_free(map, map->first_free);
1875                         RETURN(KERN_SUCCESS);
1876                 }
1877         }
1878
1879         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1880         new_entry = NULL;
1881
1882         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1883                 tmp2_end = tmp2_start + step;
1884                 /*
1885                  *      Create a new entry
1886                  *      LP64todo - for now, we can only allocate 4GB internal objects
1887                  *      because the default pager can't page bigger ones.  Remove this
1888                  *      when it can.
1889                  *
1890                  * XXX FBDP
1891                  * The reserved "page zero" in each process's address space can
1892                  * be arbitrarily large.  Splitting it into separate 4GB objects and
1893                  * therefore different VM map entries serves no purpose and just
1894                  * slows down operations on the VM map, so let's not split the
1895                  * allocation into 4GB chunks if the max protection is NONE.  That
1896                  * memory should never be accessible, so it will never get to the
1897                  * default pager.
1898                  */
1899                 tmp_start = tmp2_start;
1900                 if (object == VM_OBJECT_NULL &&
1901                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1902                     max_protection != VM_PROT_NONE &&
1903                     superpage_size == 0)
1904                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1905                 else
1906                         tmp_end = tmp2_end;
1907                 do {
1908                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
1909                                                         object, offset, needs_copy,
1910                                                         FALSE, FALSE,
1911                                                         cur_protection, max_protection,
1912                                                         VM_BEHAVIOR_DEFAULT,
1913                                                         (flags & VM_FLAGS_MAP_JIT)? VM_INHERIT_NONE: inheritance,
1914                                                         0, no_cache,
1915                                                         permanent, superpage_size);
1916                         new_entry->alias = alias;
1917                         if (flags & VM_FLAGS_MAP_JIT){
1918                                 if (!(map->jit_entry_exists)){
1919                                         new_entry->used_for_jit = TRUE;
1920                                         map->jit_entry_exists = TRUE;
1921                                 }
1922                         }
1923
1924                         if (is_submap) {
1925                                 vm_map_t        submap;
1926                                 boolean_t       submap_is_64bit;
1927                                 boolean_t       use_pmap;
1928
1929                                 new_entry->is_sub_map = TRUE;
1930                                 submap = (vm_map_t) object;
1931                                 submap_is_64bit = vm_map_is_64bit(submap);
1932                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
1933         #ifndef NO_NESTED_PMAP
1934                                 if (use_pmap && submap->pmap == NULL) {
1935                                         /* we need a sub pmap to nest... */
1936                                         submap->pmap = pmap_create(0, submap_is_64bit);
1937                                         if (submap->pmap == NULL) {
1938                                                 /* let's proceed without nesting... */
1939                                         }
1940                                 }
1941                                 if (use_pmap && submap->pmap != NULL) {
1942                                         kr = pmap_nest(map->pmap,
1943                                                        submap->pmap,
1944                                                        tmp_start,
1945                                                        tmp_start,
1946                                                        tmp_end - tmp_start);
1947                                         if (kr != KERN_SUCCESS) {
1948                                                 printf("vm_map_enter: "
1949                                                        "pmap_nest(0x%llx,0x%llx) "
1950                                                        "error 0x%x\n",
1951                                                        (long long)tmp_start,
1952                                                        (long long)tmp_end,
1953                                                        kr);
1954                                         } else {
1955                                                 /* we're now nested ! */
1956                                                 new_entry->use_pmap = TRUE;
1957                                                 pmap_empty = FALSE;
1958                                         }
1959                                 }
1960         #endif /* NO_NESTED_PMAP */
1961                         }
1962                         entry = new_entry;
1963
1964                         if (superpage_size) {
1965                                 vm_page_t pages, m;
1966                                 vm_object_t sp_object;
1967
1968                                 entry->offset = 0;
1969
1970                                 /* allocate one superpage */
1971                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
1972                                 if (kr != KERN_SUCCESS) {
1973                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
1974                                         RETURN(kr);
1975                                 }
1976
1977                                 /* create one vm_object per superpage */
1978                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
1979                                 sp_object->phys_contiguous = TRUE;
1980                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
1981                                 entry->object.vm_object = sp_object;
1982
1983                                 /* enter the base pages into the object */
1984                                 vm_object_lock(sp_object);
1985                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
1986                                         m = pages;
1987                                         pmap_zero_page(m->phys_page);
1988                                         pages = NEXT_PAGE(m);
1989                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
1990                                         vm_page_insert(m, sp_object, offset);
1991                                 }
1992                                 vm_object_unlock(sp_object);
1993                         }
1994                 } while (tmp_end != tmp2_end &&
1995                          (tmp_start = tmp_end) &&
1996                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
1997                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
1998         }
1999
2000         vm_map_unlock(map);
2001         map_locked = FALSE;
2002
2003         new_mapping_established = TRUE;
2004
2005         /*      Wire down the new entry if the user
2006          *      requested all new map entries be wired.
2007          */
2008         if ((map->wiring_required)||(superpage_size)) {
2009                 pmap_empty = FALSE; /* pmap won't be empty */
2010                 result = vm_map_wire(map, start, end,
2011                                      new_entry->protection, TRUE);
2012                 RETURN(result);
2013         }
2014
2015         if ((object != VM_OBJECT_NULL) &&
2016             (vm_map_pmap_enter_enable) &&
2017             (!anywhere)  &&
2018             (!needs_copy) &&
2019             (size < (128*1024))) {
2020                 pmap_empty = FALSE; /* pmap won't be empty */
2021
2022                 if (override_nx(map, alias) && cur_protection)
2023                         cur_protection |= VM_PROT_EXECUTE;
2024
2025                 vm_map_pmap_enter(map, start, end,
2026                                   object, offset, cur_protection);
2027         }
2028
2029 BailOut: ;
2030         if (result == KERN_SUCCESS) {
2031                 vm_prot_t pager_prot;
2032                 memory_object_t pager;
2033
2034                 if (pmap_empty &&
2035                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2036                         assert(vm_map_pmap_is_empty(map,
2037                                                     *address,
2038                                                     *address+size));
2039                 }
2040
2041                 /*
2042                  * For "named" VM objects, let the pager know that the
2043                  * memory object is being mapped.  Some pagers need to keep
2044                  * track of this, to know when they can reclaim the memory
2045                  * object, for example.
2046                  * VM calls memory_object_map() for each mapping (specifying
2047                  * the protection of each mapping) and calls
2048                  * memory_object_last_unmap() when all the mappings are gone.
2049                  */
2050                 pager_prot = max_protection;
2051                 if (needs_copy) {
2052                         /*
2053                          * Copy-On-Write mapping: won't modify
2054                          * the memory object.
2055                          */
2056                         pager_prot &= ~VM_PROT_WRITE;
2057                 }
2058                 if (!is_submap &&
2059                     object != VM_OBJECT_NULL &&
2060                     object->named &&
2061                     object->pager != MEMORY_OBJECT_NULL) {
2062                         vm_object_lock(object);
2063                         pager = object->pager;
2064                         if (object->named &&
2065                             pager != MEMORY_OBJECT_NULL) {
2066                                 assert(object->pager_ready);
2067                                 vm_object_mapping_wait(object, THREAD_UNINT);
2068                                 vm_object_mapping_begin(object);
2069                                 vm_object_unlock(object);
2070
2071                                 kr = memory_object_map(pager, pager_prot);
2072                                 assert(kr == KERN_SUCCESS);
2073
2074                                 vm_object_lock(object);
2075                                 vm_object_mapping_end(object);
2076                         }
2077                         vm_object_unlock(object);
2078                 }
2079         } else {
2080                 if (new_mapping_established) {
2081                         /*
2082                          * We have to get rid of the new mappings since we
2083                          * won't make them available to the user.
2084                          * Try and do that atomically, to minimize the risk
2085                          * that someone else create new mappings that range.
2086                          */
2087                         zap_new_map = vm_map_create(PMAP_NULL,
2088                                                     *address,
2089                                                     *address + size,
2090                                                     map->hdr.entries_pageable);
2091                         if (!map_locked) {
2092                                 vm_map_lock(map);
2093                                 map_locked = TRUE;
2094                         }
2095                         (void) vm_map_delete(map, *address, *address+size,
2096                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2097                                              zap_new_map);
2098                 }
2099                 if (zap_old_map != VM_MAP_NULL &&
2100                     zap_old_map->hdr.nentries != 0) {
2101                         vm_map_entry_t  entry1, entry2;
2102
2103                         /*
2104                          * The new mapping failed.  Attempt to restore
2105                          * the old mappings, saved in the "zap_old_map".
2106                          */
2107                         if (!map_locked) {
2108                                 vm_map_lock(map);
2109                                 map_locked = TRUE;
2110                         }
2111
2112                         /* first check if the coast is still clear */
2113                         start = vm_map_first_entry(zap_old_map)->vme_start;
2114                         end = vm_map_last_entry(zap_old_map)->vme_end;
2115                         if (vm_map_lookup_entry(map, start, &entry1) ||
2116                             vm_map_lookup_entry(map, end, &entry2) ||
2117                             entry1 != entry2) {
2118                                 /*
2119                                  * Part of that range has already been
2120                                  * re-mapped:  we can't restore the old
2121                                  * mappings...
2122                                  */
2123                                 vm_map_enter_restore_failures++;
2124                         } else {
2125                                 /*
2126                                  * Transfer the saved map entries from
2127                                  * "zap_old_map" to the original "map",
2128                                  * inserting them all after "entry1".
2129                                  */
2130                                 for (entry2 = vm_map_first_entry(zap_old_map);
2131                                      entry2 != vm_map_to_entry(zap_old_map);
2132                                      entry2 = vm_map_first_entry(zap_old_map)) {
2133                                         vm_map_size_t entry_size;
2134
2135                                         entry_size = (entry2->vme_end -
2136                                                       entry2->vme_start);
2137                                         vm_map_store_entry_unlink(zap_old_map,
2138                                                             entry2);
2139                                         zap_old_map->size -= entry_size;
2140                                         vm_map_store_entry_link(map, entry1, entry2);
2141                                         map->size += entry_size;
2142                                         entry1 = entry2;
2143                                 }
2144                                 if (map->wiring_required) {
2145                                         /*
2146                                          * XXX TODO: we should rewire the
2147                                          * old pages here...
2148                                          */
2149                                 }
2150                                 vm_map_enter_restore_successes++;
2151                         }
2152                 }
2153         }
2154
2155         if (map_locked) {
2156                 vm_map_unlock(map);
2157         }
2158
2159         /*
2160          * Get rid of the "zap_maps" and all the map entries that
2161          * they may still contain.
2162          */
2163         if (zap_old_map != VM_MAP_NULL) {
2164                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2165                 zap_old_map = VM_MAP_NULL;
2166         }
2167         if (zap_new_map != VM_MAP_NULL) {
2168                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2169                 zap_new_map = VM_MAP_NULL;
2170         }
2171
2172         return result;
2173
2174 #undef  RETURN
2175 }
2176
2177 kern_return_t
2178 vm_map_enter_mem_object(
2179         vm_map_t                target_map,
2180         vm_map_offset_t         *address,
2181         vm_map_size_t           initial_size,
2182         vm_map_offset_t         mask,
2183         int                     flags,
2184         ipc_port_t              port,
2185         vm_object_offset_t      offset,
2186         boolean_t               copy,
2187         vm_prot_t               cur_protection,
2188         vm_prot_t               max_protection,
2189         vm_inherit_t            inheritance)
2190 {
2191         vm_map_address_t        map_addr;
2192         vm_map_size_t           map_size;
2193         vm_object_t             object;
2194         vm_object_size_t        size;
2195         kern_return_t           result;
2196         boolean_t               mask_cur_protection, mask_max_protection;
2197
2198         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2199         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2200         cur_protection &= ~VM_PROT_IS_MASK;
2201         max_protection &= ~VM_PROT_IS_MASK;
2202
2203         /*
2204          * Check arguments for validity
2205          */
2206         if ((target_map == VM_MAP_NULL) ||
2207             (cur_protection & ~VM_PROT_ALL) ||
2208             (max_protection & ~VM_PROT_ALL) ||
2209             (inheritance > VM_INHERIT_LAST_VALID) ||
2210             initial_size == 0)
2211                 return KERN_INVALID_ARGUMENT;
2212
2213         map_addr = vm_map_trunc_page(*address);
2214         map_size = vm_map_round_page(initial_size);
2215         size = vm_object_round_page(initial_size);
2216
2217         /*
2218          * Find the vm object (if any) corresponding to this port.
2219          */
2220         if (!IP_VALID(port)) {
2221                 object = VM_OBJECT_NULL;
2222                 offset = 0;
2223                 copy = FALSE;
2224         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2225                 vm_named_entry_t        named_entry;
2226
2227                 named_entry = (vm_named_entry_t) port->ip_kobject;
2228                 /* a few checks to make sure user is obeying rules */
2229                 if (size == 0) {
2230                         if (offset >= named_entry->size)
2231                                 return KERN_INVALID_RIGHT;
2232                         size = named_entry->size - offset;
2233                 }
2234                 if (mask_max_protection) {
2235                         max_protection &= named_entry->protection;
2236                 }
2237                 if (mask_cur_protection) {
2238                         cur_protection &= named_entry->protection;
2239                 }
2240                 if ((named_entry->protection & max_protection) !=
2241                     max_protection)
2242                         return KERN_INVALID_RIGHT;
2243                 if ((named_entry->protection & cur_protection) !=
2244                     cur_protection)
2245                         return KERN_INVALID_RIGHT;
2246                 if (named_entry->size < (offset + size))
2247                         return KERN_INVALID_ARGUMENT;
2248
2249                 /* the callers parameter offset is defined to be the */
2250                 /* offset from beginning of named entry offset in object */
2251                 offset = offset + named_entry->offset;
2252
2253                 named_entry_lock(named_entry);
2254                 if (named_entry->is_sub_map) {
2255                         vm_map_t                submap;
2256
2257                         submap = named_entry->backing.map;
2258                         vm_map_lock(submap);
2259                         vm_map_reference(submap);
2260                         vm_map_unlock(submap);
2261                         named_entry_unlock(named_entry);
2262
2263                         result = vm_map_enter(target_map,
2264                                               &map_addr,
2265                                               map_size,
2266                                               mask,
2267                                               flags | VM_FLAGS_SUBMAP,
2268                                               (vm_object_t) submap,
2269                                               offset,
2270                                               copy,
2271                                               cur_protection,
2272                                               max_protection,
2273                                               inheritance);
2274                         if (result != KERN_SUCCESS) {
2275                                 vm_map_deallocate(submap);
2276                         } else {
2277                                 /*
2278                                  * No need to lock "submap" just to check its
2279                                  * "mapped" flag: that flag is never reset
2280                                  * once it's been set and if we race, we'll
2281                                  * just end up setting it twice, which is OK.
2282                                  */
2283                                 if (submap->mapped == FALSE) {
2284                                         /*
2285                                          * This submap has never been mapped.
2286                                          * Set its "mapped" flag now that it
2287                                          * has been mapped.
2288                                          * This happens only for the first ever
2289                                          * mapping of a "submap".
2290                                          */
2291                                         vm_map_lock(submap);
2292                                         submap->mapped = TRUE;
2293                                         vm_map_unlock(submap);
2294                                 }
2295                                 *address = map_addr;
2296                         }
2297                         return result;
2298
2299                 } else if (named_entry->is_pager) {
2300                         unsigned int    access;
2301                         vm_prot_t       protections;
2302                         unsigned int    wimg_mode;
2303
2304                         protections = named_entry->protection & VM_PROT_ALL;
2305                         access = GET_MAP_MEM(named_entry->protection);
2306
2307                         object = vm_object_enter(named_entry->backing.pager,
2308                                                  named_entry->size,
2309                                                  named_entry->internal,
2310                                                  FALSE,
2311                                                  FALSE);
2312                         if (object == VM_OBJECT_NULL) {
2313                                 named_entry_unlock(named_entry);
2314                                 return KERN_INVALID_OBJECT;
2315                         }
2316
2317                         /* JMM - drop reference on pager here */
2318
2319                         /* create an extra ref for the named entry */
2320                         vm_object_lock(object);
2321                         vm_object_reference_locked(object);
2322                         named_entry->backing.object = object;
2323                         named_entry->is_pager = FALSE;
2324                         named_entry_unlock(named_entry);
2325
2326                         wimg_mode = object->wimg_bits;
2327
2328                         if (access == MAP_MEM_IO) {
2329                                 wimg_mode = VM_WIMG_IO;
2330                         } else if (access == MAP_MEM_COPYBACK) {
2331                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2332                         } else if (access == MAP_MEM_WTHRU) {
2333                                 wimg_mode = VM_WIMG_WTHRU;
2334                         } else if (access == MAP_MEM_WCOMB) {
2335                                 wimg_mode = VM_WIMG_WCOMB;
2336                         }
2337
2338                         /* wait for object (if any) to be ready */
2339                         if (!named_entry->internal) {
2340                                 while (!object->pager_ready) {
2341                                         vm_object_wait(
2342                                                 object,
2343                                                 VM_OBJECT_EVENT_PAGER_READY,
2344                                                 THREAD_UNINT);
2345                                         vm_object_lock(object);
2346                                 }
2347                         }
2348
2349                         if (object->wimg_bits != wimg_mode)
2350                                 vm_object_change_wimg_mode(object, wimg_mode);
2351
2352                         object->true_share = TRUE;
2353
2354                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2355                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2356                         vm_object_unlock(object);
2357                 } else {
2358                         /* This is the case where we are going to map */
2359                         /* an already mapped object.  If the object is */
2360                         /* not ready it is internal.  An external     */
2361                         /* object cannot be mapped until it is ready  */
2362                         /* we can therefore avoid the ready check     */
2363                         /* in this case.  */
2364                         object = named_entry->backing.object;
2365                         assert(object != VM_OBJECT_NULL);
2366                         named_entry_unlock(named_entry);
2367                         vm_object_reference(object);
2368                 }
2369         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2370                 /*
2371                  * JMM - This is temporary until we unify named entries
2372                  * and raw memory objects.
2373                  *
2374                  * Detected fake ip_kotype for a memory object.  In
2375                  * this case, the port isn't really a port at all, but
2376                  * instead is just a raw memory object.
2377                  */
2378
2379                 object = vm_object_enter((memory_object_t)port,
2380                                          size, FALSE, FALSE, FALSE);
2381                 if (object == VM_OBJECT_NULL)
2382                         return KERN_INVALID_OBJECT;
2383
2384                 /* wait for object (if any) to be ready */
2385                 if (object != VM_OBJECT_NULL) {
2386                         if (object == kernel_object) {
2387                                 printf("Warning: Attempt to map kernel object"
2388                                         " by a non-private kernel entity\n");
2389                                 return KERN_INVALID_OBJECT;
2390                         }
2391                         if (!object->pager_ready) {
2392                                 vm_object_lock(object);
2393
2394                                 while (!object->pager_ready) {
2395                                         vm_object_wait(object,
2396                                                        VM_OBJECT_EVENT_PAGER_READY,
2397                                                        THREAD_UNINT);
2398                                         vm_object_lock(object);
2399                                 }
2400                                 vm_object_unlock(object);
2401                         }
2402                 }
2403         } else {
2404                 return KERN_INVALID_OBJECT;
2405         }
2406
2407         if (object != VM_OBJECT_NULL &&
2408             object->named &&
2409             object->pager != MEMORY_OBJECT_NULL &&
2410             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2411                 memory_object_t pager;
2412                 vm_prot_t       pager_prot;
2413                 kern_return_t   kr;
2414
2415                 /*
2416                  * For "named" VM objects, let the pager know that the
2417                  * memory object is being mapped.  Some pagers need to keep
2418                  * track of this, to know when they can reclaim the memory
2419                  * object, for example.
2420                  * VM calls memory_object_map() for each mapping (specifying
2421                  * the protection of each mapping) and calls
2422                  * memory_object_last_unmap() when all the mappings are gone.
2423                  */
2424                 pager_prot = max_protection;
2425                 if (copy) {
2426                         /*
2427                          * Copy-On-Write mapping: won't modify the
2428                          * memory object.
2429                          */
2430                         pager_prot &= ~VM_PROT_WRITE;
2431                 }
2432                 vm_object_lock(object);
2433                 pager = object->pager;
2434                 if (object->named &&
2435                     pager != MEMORY_OBJECT_NULL &&
2436                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2437                         assert(object->pager_ready);
2438                         vm_object_mapping_wait(object, THREAD_UNINT);
2439                         vm_object_mapping_begin(object);
2440                         vm_object_unlock(object);
2441
2442                         kr = memory_object_map(pager, pager_prot);
2443                         assert(kr == KERN_SUCCESS);
2444
2445                         vm_object_lock(object);
2446                         vm_object_mapping_end(object);
2447                 }
2448                 vm_object_unlock(object);
2449         }
2450
2451         /*
2452          *      Perform the copy if requested
2453          */
2454
2455         if (copy) {
2456                 vm_object_t             new_object;
2457                 vm_object_offset_t      new_offset;
2458
2459                 result = vm_object_copy_strategically(object, offset, size,
2460                                                       &new_object, &new_offset,
2461                                                       &copy);
2462
2463
2464                 if (result == KERN_MEMORY_RESTART_COPY) {
2465                         boolean_t success;
2466                         boolean_t src_needs_copy;
2467
2468                         /*
2469                          * XXX
2470                          * We currently ignore src_needs_copy.
2471                          * This really is the issue of how to make
2472                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2473                          * non-kernel users to use. Solution forthcoming.
2474                          * In the meantime, since we don't allow non-kernel
2475                          * memory managers to specify symmetric copy,
2476                          * we won't run into problems here.
2477                          */
2478                         new_object = object;
2479                         new_offset = offset;
2480                         success = vm_object_copy_quickly(&new_object,
2481                                                          new_offset, size,
2482                                                          &src_needs_copy,
2483                                                          &copy);
2484                         assert(success);
2485                         result = KERN_SUCCESS;
2486                 }
2487                 /*
2488                  *      Throw away the reference to the
2489                  *      original object, as it won't be mapped.
2490                  */
2491
2492                 vm_object_deallocate(object);
2493
2494                 if (result != KERN_SUCCESS)
2495                         return result;
2496
2497                 object = new_object;
2498                 offset = new_offset;
2499         }
2500
2501         result = vm_map_enter(target_map,
2502                               &map_addr, map_size,
2503                               (vm_map_offset_t)mask,
2504                               flags,
2505                               object, offset,
2506                               copy,
2507                               cur_protection, max_protection, inheritance);
2508         if (result != KERN_SUCCESS)
2509                 vm_object_deallocate(object);
2510         *address = map_addr;
2511         return result;
2512 }
2513
2514
2515
2516
2517 kern_return_t
2518 vm_map_enter_mem_object_control(
2519         vm_map_t                target_map,
2520         vm_map_offset_t         *address,
2521         vm_map_size_t           initial_size,
2522         vm_map_offset_t         mask,
2523         int                     flags,
2524         memory_object_control_t control,
2525         vm_object_offset_t      offset,
2526         boolean_t               copy,
2527         vm_prot_t               cur_protection,
2528         vm_prot_t               max_protection,
2529         vm_inherit_t            inheritance)
2530 {
2531         vm_map_address_t        map_addr;
2532         vm_map_size_t           map_size;
2533         vm_object_t             object;
2534         vm_object_size_t        size;
2535         kern_return_t           result;
2536         memory_object_t         pager;
2537         vm_prot_t               pager_prot;
2538         kern_return_t           kr;
2539
2540         /*
2541          * Check arguments for validity
2542          */
2543         if ((target_map == VM_MAP_NULL) ||
2544             (cur_protection & ~VM_PROT_ALL) ||
2545             (max_protection & ~VM_PROT_ALL) ||
2546             (inheritance > VM_INHERIT_LAST_VALID) ||
2547             initial_size == 0)
2548                 return KERN_INVALID_ARGUMENT;
2549
2550         map_addr = vm_map_trunc_page(*address);
2551         map_size = vm_map_round_page(initial_size);
2552         size = vm_object_round_page(initial_size);
2553
2554         object = memory_object_control_to_vm_object(control);
2555
2556         if (object == VM_OBJECT_NULL)
2557                 return KERN_INVALID_OBJECT;
2558
2559         if (object == kernel_object) {
2560                 printf("Warning: Attempt to map kernel object"
2561                        " by a non-private kernel entity\n");
2562                 return KERN_INVALID_OBJECT;
2563         }
2564
2565         vm_object_lock(object);
2566         object->ref_count++;
2567         vm_object_res_reference(object);
2568
2569         /*
2570          * For "named" VM objects, let the pager know that the
2571          * memory object is being mapped.  Some pagers need to keep
2572          * track of this, to know when they can reclaim the memory
2573          * object, for example.
2574          * VM calls memory_object_map() for each mapping (specifying
2575          * the protection of each mapping) and calls
2576          * memory_object_last_unmap() when all the mappings are gone.
2577          */
2578         pager_prot = max_protection;
2579         if (copy) {
2580                 pager_prot &= ~VM_PROT_WRITE;
2581         }
2582         pager = object->pager;
2583         if (object->named &&
2584             pager != MEMORY_OBJECT_NULL &&
2585             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2586                 assert(object->pager_ready);
2587                 vm_object_mapping_wait(object, THREAD_UNINT);
2588                 vm_object_mapping_begin(object);
2589                 vm_object_unlock(object);
2590
2591                 kr = memory_object_map(pager, pager_prot);
2592                 assert(kr == KERN_SUCCESS);
2593
2594                 vm_object_lock(object);
2595                 vm_object_mapping_end(object);
2596         }
2597         vm_object_unlock(object);
2598
2599         /*
2600          *      Perform the copy if requested
2601          */
2602
2603         if (copy) {
2604                 vm_object_t             new_object;
2605                 vm_object_offset_t      new_offset;
2606
2607                 result = vm_object_copy_strategically(object, offset, size,
2608                                                       &new_object, &new_offset,
2609                                                       &copy);
2610
2611
2612                 if (result == KERN_MEMORY_RESTART_COPY) {
2613                         boolean_t success;
2614                         boolean_t src_needs_copy;
2615
2616                         /*
2617                          * XXX
2618                          * We currently ignore src_needs_copy.
2619                          * This really is the issue of how to make
2620                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2621                          * non-kernel users to use. Solution forthcoming.
2622                          * In the meantime, since we don't allow non-kernel
2623                          * memory managers to specify symmetric copy,
2624                          * we won't run into problems here.
2625                          */
2626                         new_object = object;
2627                         new_offset = offset;
2628                         success = vm_object_copy_quickly(&new_object,
2629                                                          new_offset, size,
2630                                                          &src_needs_copy,
2631                                                          &copy);
2632                         assert(success);
2633                         result = KERN_SUCCESS;
2634                 }
2635                 /*
2636                  *      Throw away the reference to the
2637                  *      original object, as it won't be mapped.
2638                  */
2639
2640                 vm_object_deallocate(object);
2641
2642                 if (result != KERN_SUCCESS)
2643                         return result;
2644
2645                 object = new_object;
2646                 offset = new_offset;
2647         }
2648
2649         result = vm_map_enter(target_map,
2650                               &map_addr, map_size,
2651                               (vm_map_offset_t)mask,
2652                               flags,
2653                               object, offset,
2654                               copy,
2655                               cur_protection, max_protection, inheritance);
2656         if (result != KERN_SUCCESS)
2657                 vm_object_deallocate(object);
2658         *address = map_addr;
2659
2660         return result;
2661 }
2662
2663
2664 #if     VM_CPM
2665
2666 #ifdef MACH_ASSERT
2667 extern pmap_paddr_t     avail_start, avail_end;
2668 #endif
2669
2670 /*
2671  *      Allocate memory in the specified map, with the caveat that
2672  *      the memory is physically contiguous.  This call may fail
2673  *      if the system can't find sufficient contiguous memory.
2674  *      This call may cause or lead to heart-stopping amounts of
2675  *      paging activity.
2676  *
2677  *      Memory obtained from this call should be freed in the
2678  *      normal way, viz., via vm_deallocate.
2679  */
2680 kern_return_t
2681 vm_map_enter_cpm(
2682         vm_map_t                map,
2683         vm_map_offset_t *addr,
2684         vm_map_size_t           size,
2685         int                     flags)
2686 {
2687         vm_object_t             cpm_obj;
2688         pmap_t                  pmap;
2689         vm_page_t               m, pages;
2690         kern_return_t           kr;
2691         vm_map_offset_t         va, start, end, offset;
2692 #if     MACH_ASSERT
2693         vm_map_offset_t         prev_addr;
2694 #endif  /* MACH_ASSERT */
2695
2696         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2697
2698         if (!vm_allocate_cpm_enabled)
2699                 return KERN_FAILURE;
2700
2701         if (size == 0) {
2702                 *addr = 0;
2703                 return KERN_SUCCESS;
2704         }
2705         if (anywhere)
2706                 *addr = vm_map_min(map);
2707         else
2708                 *addr = vm_map_trunc_page(*addr);
2709         size = vm_map_round_page(size);
2710
2711         /*
2712          * LP64todo - cpm_allocate should probably allow
2713          * allocations of >4GB, but not with the current
2714          * algorithm, so just cast down the size for now.
2715          */
2716         if (size > VM_MAX_ADDRESS)
2717                 return KERN_RESOURCE_SHORTAGE;
2718         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2719                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2720                 return kr;
2721
2722         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2723         assert(cpm_obj != VM_OBJECT_NULL);
2724         assert(cpm_obj->internal);
2725         assert(cpm_obj->size == (vm_object_size_t)size);
2726         assert(cpm_obj->can_persist == FALSE);
2727         assert(cpm_obj->pager_created == FALSE);
2728         assert(cpm_obj->pageout == FALSE);
2729         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2730
2731         /*
2732          *      Insert pages into object.
2733          */
2734
2735         vm_object_lock(cpm_obj);
2736         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2737                 m = pages;
2738                 pages = NEXT_PAGE(m);
2739                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2740
2741                 assert(!m->gobbled);
2742                 assert(!m->wanted);
2743                 assert(!m->pageout);
2744                 assert(!m->tabled);
2745                 assert(VM_PAGE_WIRED(m));
2746                 /*
2747                  * ENCRYPTED SWAP:
2748                  * "m" is not supposed to be pageable, so it
2749                  * should not be encrypted.  It wouldn't be safe
2750                  * to enter it in a new VM object while encrypted.
2751                  */
2752                 ASSERT_PAGE_DECRYPTED(m);
2753                 assert(m->busy);
2754                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2755
2756                 m->busy = FALSE;
2757                 vm_page_insert(m, cpm_obj, offset);
2758         }
2759         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2760         vm_object_unlock(cpm_obj);
2761
2762         /*
2763          *      Hang onto a reference on the object in case a
2764          *      multi-threaded application for some reason decides
2765          *      to deallocate the portion of the address space into
2766          *      which we will insert this object.
2767          *
2768          *      Unfortunately, we must insert the object now before
2769          *      we can talk to the pmap module about which addresses
2770          *      must be wired down.  Hence, the race with a multi-
2771          *      threaded app.
2772          */
2773         vm_object_reference(cpm_obj);
2774
2775         /*
2776          *      Insert object into map.
2777          */
2778
2779         kr = vm_map_enter(
2780                 map,
2781                 addr,
2782                 size,
2783                 (vm_map_offset_t)0,
2784                 flags,
2785                 cpm_obj,
2786                 (vm_object_offset_t)0,
2787                 FALSE,
2788                 VM_PROT_ALL,
2789                 VM_PROT_ALL,
2790                 VM_INHERIT_DEFAULT);
2791
2792         if (kr != KERN_SUCCESS) {
2793                 /*
2794                  *      A CPM object doesn't have can_persist set,
2795                  *      so all we have to do is deallocate it to
2796                  *      free up these pages.
2797                  */
2798                 assert(cpm_obj->pager_created == FALSE);
2799                 assert(cpm_obj->can_persist == FALSE);
2800                 assert(cpm_obj->pageout == FALSE);
2801                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2802                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2803                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2804         }
2805
2806         /*
2807          *      Inform the physical mapping system that the
2808          *      range of addresses may not fault, so that
2809          *      page tables and such can be locked down as well.
2810          */
2811         start = *addr;
2812         end = start + size;
2813         pmap = vm_map_pmap(map);
2814         pmap_pageable(pmap, start, end, FALSE);
2815
2816         /*
2817          *      Enter each page into the pmap, to avoid faults.
2818          *      Note that this loop could be coded more efficiently,
2819          *      if the need arose, rather than looking up each page
2820          *      again.
2821          */
2822         for (offset = 0, va = start; offset < size;
2823              va += PAGE_SIZE, offset += PAGE_SIZE) {
2824                 int type_of_fault;
2825
2826                 vm_object_lock(cpm_obj);
2827                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2828                 assert(m != VM_PAGE_NULL);
2829
2830                 vm_page_zero_fill(m);
2831
2832                 type_of_fault = DBG_ZERO_FILL_FAULT;
2833
2834                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2835                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
2836                                &type_of_fault);
2837
2838                 vm_object_unlock(cpm_obj);
2839         }
2840
2841 #if     MACH_ASSERT
2842         /*
2843          *      Verify ordering in address space.
2844          */
2845         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2846                 vm_object_lock(cpm_obj);
2847                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2848                 vm_object_unlock(cpm_obj);
2849                 if (m == VM_PAGE_NULL)
2850                         panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2851                               cpm_obj, offset);
2852                 assert(m->tabled);
2853                 assert(!m->busy);
2854                 assert(!m->wanted);
2855                 assert(!m->fictitious);
2856                 assert(!m->private);
2857                 assert(!m->absent);
2858                 assert(!m->error);
2859                 assert(!m->cleaning);
2860                 assert(!m->precious);
2861                 assert(!m->clustered);
2862                 if (offset != 0) {
2863                         if (m->phys_page != prev_addr + 1) {
2864                                 printf("start 0x%x end 0x%x va 0x%x\n",
2865                                        start, end, va);
2866                                 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2867                                 printf("m 0x%x prev_address 0x%x\n", m,
2868                                        prev_addr);
2869                                 panic("vm_allocate_cpm:  pages not contig!");
2870                         }
2871                 }
2872                 prev_addr = m->phys_page;
2873         }
2874 #endif  /* MACH_ASSERT */
2875
2876         vm_object_deallocate(cpm_obj); /* kill extra ref */
2877
2878         return kr;
2879 }
2880
2881
2882 #else   /* VM_CPM */
2883
2884 /*
2885  *      Interface is defined in all cases, but unless the kernel
2886  *      is built explicitly for this option, the interface does
2887  *      nothing.
2888  */
2889
2890 kern_return_t
2891 vm_map_enter_cpm(
2892         __unused vm_map_t       map,
2893         __unused vm_map_offset_t        *addr,
2894         __unused vm_map_size_t  size,
2895         __unused int            flags)
2896 {
2897         return KERN_FAILURE;
2898 }
2899 #endif /* VM_CPM */
2900
2901 /* Not used without nested pmaps */
2902 #ifndef NO_NESTED_PMAP
2903 /*
2904  * Clip and unnest a portion of a nested submap mapping.
2905  */
2906
2907
2908 static void
2909 vm_map_clip_unnest(
2910         vm_map_t        map,
2911         vm_map_entry_t  entry,
2912         vm_map_offset_t start_unnest,
2913         vm_map_offset_t end_unnest)
2914 {
2915         vm_map_offset_t old_start_unnest = start_unnest;
2916         vm_map_offset_t old_end_unnest = end_unnest;
2917
2918         assert(entry->is_sub_map);
2919         assert(entry->object.sub_map != NULL);
2920
2921         /*
2922          * Query the platform for the optimal unnest range.
2923          * DRK: There's some duplication of effort here, since
2924          * callers may have adjusted the range to some extent. This
2925          * routine was introduced to support 1GiB subtree nesting
2926          * for x86 platforms, which can also nest on 2MiB boundaries
2927          * depending on size/alignment.
2928          */
2929         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
2930                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
2931         }
2932
2933         if (entry->vme_start > start_unnest ||
2934             entry->vme_end < end_unnest) {
2935                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
2936                       "bad nested entry: start=0x%llx end=0x%llx\n",
2937                       (long long)start_unnest, (long long)end_unnest,
2938                       (long long)entry->vme_start, (long long)entry->vme_end);
2939         }
2940
2941         if (start_unnest > entry->vme_start) {
2942                 _vm_map_clip_start(&map->hdr,
2943                                    entry,
2944                                    start_unnest);
2945                 vm_map_store_update_first_free(map, map->first_free);
2946         }
2947         if (entry->vme_end > end_unnest) {
2948                 _vm_map_clip_end(&map->hdr,
2949                                  entry,
2950                                  end_unnest);
2951                 vm_map_store_update_first_free(map, map->first_free);
2952         }
2953
2954         pmap_unnest(map->pmap,
2955                     entry->vme_start,
2956                     entry->vme_end - entry->vme_start);
2957         if ((map->mapped) && (map->ref_count)) {
2958                 /* clean up parent map/maps */
2959                 vm_map_submap_pmap_clean(
2960                         map, entry->vme_start,
2961                         entry->vme_end,
2962                         entry->object.sub_map,
2963                         entry->offset);
2964         }
2965         entry->use_pmap = FALSE;
2966 }
2967 #endif  /* NO_NESTED_PMAP */
2968
2969 /*
2970  *      vm_map_clip_start:      [ internal use only ]
2971  *
2972  *      Asserts that the given entry begins at or after
2973  *      the specified address; if necessary,
2974  *      it splits the entry into two.
2975  */
2976 void
2977 vm_map_clip_start(
2978         vm_map_t        map,
2979         vm_map_entry_t  entry,
2980         vm_map_offset_t startaddr)
2981 {
2982 #ifndef NO_NESTED_PMAP
2983         if (entry->use_pmap &&
2984             startaddr >= entry->vme_start) {
2985                 vm_map_offset_t start_unnest, end_unnest;
2986
2987                 /*
2988                  * Make sure "startaddr" is no longer in a nested range
2989                  * before we clip.  Unnest only the minimum range the platform
2990                  * can handle.
2991                  * vm_map_clip_unnest may perform additional adjustments to
2992                  * the unnest range.
2993                  */
2994                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
2995                 end_unnest = start_unnest + pmap_nesting_size_min;
2996                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
2997         }
2998 #endif /* NO_NESTED_PMAP */
2999         if (startaddr > entry->vme_start) {
3000                 if (entry->object.vm_object &&
3001                     !entry->is_sub_map &&
3002                     entry->object.vm_object->phys_contiguous) {
3003                         pmap_remove(map->pmap,
3004                                     (addr64_t)(entry->vme_start),
3005                                     (addr64_t)(entry->vme_end));
3006                 }
3007                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3008                 vm_map_store_update_first_free(map, map->first_free);
3009         }
3010 }
3011
3012
3013 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3014         MACRO_BEGIN \
3015         if ((startaddr) > (entry)->vme_start) \
3016                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3017         MACRO_END
3018
3019 /*
3020  *      This routine is called only when it is known that
3021  *      the entry must be split.
3022  */
3023 static void
3024 _vm_map_clip_start(
3025         register struct vm_map_header   *map_header,
3026         register vm_map_entry_t         entry,
3027         register vm_map_offset_t                start)
3028 {
3029         register vm_map_entry_t new_entry;
3030
3031         /*
3032          *      Split off the front portion --
3033          *      note that we must insert the new
3034          *      entry BEFORE this one, so that
3035          *      this entry has the specified starting
3036          *      address.
3037          */
3038
3039         new_entry = _vm_map_entry_create(map_header);
3040         vm_map_entry_copy_full(new_entry, entry);
3041
3042         new_entry->vme_end = start;
3043         assert(new_entry->vme_start < new_entry->vme_end);
3044         entry->offset += (start - entry->vme_start);
3045         assert(start < entry->vme_end);
3046         entry->vme_start = start;
3047
3048         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3049
3050         if (entry->is_sub_map)
3051                 vm_map_reference(new_entry->object.sub_map);
3052         else
3053                 vm_object_reference(new_entry->object.vm_object);
3054 }
3055
3056
3057 /*
3058  *      vm_map_clip_end:        [ internal use only ]
3059  *
3060  *      Asserts that the given entry ends at or before
3061  *      the specified address; if necessary,
3062  *      it splits the entry into two.
3063  */
3064 void
3065 vm_map_clip_end(
3066         vm_map_t        map,
3067         vm_map_entry_t  entry,
3068         vm_map_offset_t endaddr)
3069 {
3070         if (endaddr > entry->vme_end) {
3071                 /*
3072                  * Within the scope of this clipping, limit "endaddr" to
3073                  * the end of this map entry...
3074                  */
3075                 endaddr = entry->vme_end;
3076         }
3077 #ifndef NO_NESTED_PMAP
3078         if (entry->use_pmap) {
3079                 vm_map_offset_t start_unnest, end_unnest;
3080
3081                 /*
3082                  * Make sure the range between the start of this entry and
3083                  * the new "endaddr" is no longer nested before we clip.
3084                  * Unnest only the minimum range the platform can handle.
3085                  * vm_map_clip_unnest may perform additional adjustments to
3086                  * the unnest range.
3087                  */
3088                 start_unnest = entry->vme_start;
3089                 end_unnest =
3090                         (endaddr + pmap_nesting_size_min - 1) &
3091                         ~(pmap_nesting_size_min - 1);
3092                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3093         }
3094 #endif /* NO_NESTED_PMAP */
3095         if (endaddr < entry->vme_end) {
3096                 if (entry->object.vm_object &&
3097                     !entry->is_sub_map &&
3098                     entry->object.vm_object->phys_contiguous) {
3099                         pmap_remove(map->pmap,
3100                                     (addr64_t)(entry->vme_start),
3101                                     (addr64_t)(entry->vme_end));
3102                 }
3103                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3104                 vm_map_store_update_first_free(map, map->first_free);
3105         }
3106 }
3107
3108
3109 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3110         MACRO_BEGIN \
3111         if ((endaddr) < (entry)->vme_end) \
3112                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3113         MACRO_END
3114
3115 /*
3116  *      This routine is called only when it is known that
3117  *      the entry must be split.
3118  */
3119 static void
3120 _vm_map_clip_end(
3121         register struct vm_map_header   *map_header,
3122         register vm_map_entry_t         entry,
3123         register vm_map_offset_t        end)
3124 {
3125         register vm_map_entry_t new_entry;
3126
3127         /*
3128          *      Create a new entry and insert it
3129          *      AFTER the specified entry
3130          */
3131
3132         new_entry = _vm_map_entry_create(map_header);
3133         vm_map_entry_copy_full(new_entry, entry);
3134
3135         assert(entry->vme_start < end);
3136         new_entry->vme_start = entry->vme_end = end;
3137         new_entry->offset += (end - entry->vme_start);
3138         assert(new_entry->vme_start < new_entry->vme_end);
3139
3140         _vm_map_store_entry_link(map_header, entry, new_entry);
3141
3142         if (entry->is_sub_map)
3143                 vm_map_reference(new_entry->object.sub_map);
3144         else
3145                 vm_object_reference(new_entry->object.vm_object);
3146 }
3147
3148
3149 /*
3150  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3151  *
3152  *      Asserts that the starting and ending region
3153  *      addresses fall within the valid range of the map.
3154  */
3155 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3156         MACRO_BEGIN                             \
3157         if (start < vm_map_min(map))            \
3158                 start = vm_map_min(map);        \
3159         if (end > vm_map_max(map))              \
3160                 end = vm_map_max(map);          \
3161         if (start > end)                        \
3162                 start = end;                    \
3163         MACRO_END
3164
3165 /*
3166  *      vm_map_range_check:     [ internal use only ]
3167  *
3168  *      Check that the region defined by the specified start and
3169  *      end addresses are wholly contained within a single map
3170  *      entry or set of adjacent map entries of the spacified map,
3171  *      i.e. the specified region contains no unmapped space.
3172  *      If any or all of the region is unmapped, FALSE is returned.
3173  *      Otherwise, TRUE is returned and if the output argument 'entry'
3174  *      is not NULL it points to the map entry containing the start
3175  *      of the region.
3176  *
3177  *      The map is locked for reading on entry and is left locked.
3178  */
3179 static boolean_t
3180 vm_map_range_check(
3181         register vm_map_t       map,
3182         register vm_map_offset_t        start,
3183         register vm_map_offset_t        end,
3184         vm_map_entry_t          *entry)
3185 {
3186         vm_map_entry_t          cur;
3187         register vm_map_offset_t        prev;
3188
3189         /*
3190          *      Basic sanity checks first
3191          */
3192         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3193                 return (FALSE);
3194
3195         /*
3196          *      Check first if the region starts within a valid
3197          *      mapping for the map.
3198          */
3199         if (!vm_map_lookup_entry(map, start, &cur))
3200                 return (FALSE);
3201
3202         /*
3203          *      Optimize for the case that the region is contained
3204          *      in a single map entry.
3205          */
3206         if (entry != (vm_map_entry_t *) NULL)
3207                 *entry = cur;
3208         if (end <= cur->vme_end)
3209                 return (TRUE);
3210
3211         /*
3212          *      If the region is not wholly contained within a
3213          *      single entry, walk the entries looking for holes.
3214          */
3215         prev = cur->vme_end;
3216         cur = cur->vme_next;
3217         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3218                 if (end <= cur->vme_end)
3219                         return (TRUE);
3220                 prev = cur->vme_end;
3221                 cur = cur->vme_next;
3222         }
3223         return (FALSE);
3224 }
3225
3226 /*
3227  *      vm_map_submap:          [ kernel use only ]
3228  *
3229  *      Mark the given range as handled by a subordinate map.
3230  *
3231  *      This range must have been created with vm_map_find using
3232  *      the vm_submap_object, and no other operations may have been
3233  *      performed on this range prior to calling vm_map_submap.
3234  *
3235  *      Only a limited number of operations can be performed
3236  *      within this rage after calling vm_map_submap:
3237  *              vm_fault
3238  *      [Don't try vm_map_copyin!]
3239  *
3240  *      To remove a submapping, one must first remove the
3241  *      range from the superior map, and then destroy the
3242  *      submap (if desired).  [Better yet, don't try it.]
3243  */
3244 kern_return_t
3245 vm_map_submap(
3246         vm_map_t                map,
3247         vm_map_offset_t start,
3248         vm_map_offset_t end,
3249         vm_map_t                submap,
3250         vm_map_offset_t offset,
3251 #ifdef NO_NESTED_PMAP
3252         __unused
3253 #endif  /* NO_NESTED_PMAP */
3254         boolean_t               use_pmap)
3255 {
3256         vm_map_entry_t          entry;
3257         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3258         register vm_object_t    object;
3259
3260         vm_map_lock(map);
3261
3262         if (! vm_map_lookup_entry(map, start, &entry)) {
3263                 entry = entry->vme_next;
3264         }
3265
3266         if (entry == vm_map_to_entry(map) ||
3267             entry->is_sub_map) {
3268                 vm_map_unlock(map);
3269                 return KERN_INVALID_ARGUMENT;
3270         }
3271
3272         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3273         vm_map_clip_start(map, entry, start);
3274         vm_map_clip_end(map, entry, end);
3275
3276         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3277             (!entry->is_sub_map) &&
3278             ((object = entry->object.vm_object) == vm_submap_object) &&
3279             (object->resident_page_count == 0) &&
3280             (object->copy == VM_OBJECT_NULL) &&
3281             (object->shadow == VM_OBJECT_NULL) &&
3282             (!object->pager_created)) {
3283                 entry->offset = (vm_object_offset_t)offset;
3284                 entry->object.vm_object = VM_OBJECT_NULL;
3285                 vm_object_deallocate(object);
3286                 entry->is_sub_map = TRUE;
3287                 entry->object.sub_map = submap;
3288                 vm_map_reference(submap);
3289                 submap->mapped = TRUE;
3290
3291 #ifndef NO_NESTED_PMAP
3292                 if (use_pmap) {
3293                         /* nest if platform code will allow */
3294                         if(submap->pmap == NULL) {
3295                                 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3296                                 if(submap->pmap == PMAP_NULL) {
3297                                         vm_map_unlock(map);
3298                                         return(KERN_NO_SPACE);
3299                                 }
3300                         }
3301                         result = pmap_nest(map->pmap,
3302                                            (entry->object.sub_map)->pmap,
3303                                            (addr64_t)start,
3304                                            (addr64_t)start,
3305                                            (uint64_t)(end - start));
3306                         if(result)
3307                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3308                         entry->use_pmap = TRUE;
3309                 }
3310 #else   /* NO_NESTED_PMAP */
3311                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3312 #endif  /* NO_NESTED_PMAP */
3313                 result = KERN_SUCCESS;
3314         }
3315         vm_map_unlock(map);
3316
3317         return(result);
3318 }
3319
3320 /*
3321  *      vm_map_protect:
3322  *
3323  *      Sets the protection of the specified address
3324  *      region in the target map.  If "set_max" is
3325  *      specified, the maximum protection is to be set;
3326  *      otherwise, only the current protection is affected.
3327  */
3328 kern_return_t
3329 vm_map_protect(
3330         register vm_map_t       map,
3331         register vm_map_offset_t        start,
3332         register vm_map_offset_t        end,
3333         register vm_prot_t      new_prot,
3334         register boolean_t      set_max)
3335 {
3336         register vm_map_entry_t         current;
3337         register vm_map_offset_t        prev;
3338         vm_map_entry_t                  entry;
3339         vm_prot_t                       new_max;
3340
3341         XPR(XPR_VM_MAP,
3342             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3343             map, start, end, new_prot, set_max);
3344
3345         vm_map_lock(map);
3346
3347         /* LP64todo - remove this check when vm_map_commpage64()
3348          * no longer has to stuff in a map_entry for the commpage
3349          * above the map's max_offset.
3350          */
3351         if (start >= map->max_offset) {
3352                 vm_map_unlock(map);
3353                 return(KERN_INVALID_ADDRESS);
3354         }
3355
3356         while(1) {
3357                 /*
3358                  *      Lookup the entry.  If it doesn't start in a valid
3359                  *      entry, return an error.
3360                  */
3361                 if (! vm_map_lookup_entry(map, start, &entry)) {
3362                         vm_map_unlock(map);
3363                         return(KERN_INVALID_ADDRESS);
3364                 }
3365
3366                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3367                         start = SUPERPAGE_ROUND_DOWN(start);
3368                         continue;
3369                 }
3370                 break;
3371         }
3372         if (entry->superpage_size)
3373                 end = SUPERPAGE_ROUND_UP(end);
3374
3375         /*
3376          *      Make a first pass to check for protection and address
3377          *      violations.
3378          */
3379
3380         current = entry;
3381         prev = current->vme_start;
3382         while ((current != vm_map_to_entry(map)) &&
3383                (current->vme_start < end)) {
3384
3385                 /*
3386                  * If there is a hole, return an error.
3387                  */
3388                 if (current->vme_start != prev) {
3389                         vm_map_unlock(map);
3390                         return(KERN_INVALID_ADDRESS);
3391                 }
3392
3393                 new_max = current->max_protection;
3394                 if(new_prot & VM_PROT_COPY) {
3395                         new_max |= VM_PROT_WRITE;
3396                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3397                                 vm_map_unlock(map);
3398                                 return(KERN_PROTECTION_FAILURE);
3399                         }
3400                 } else {
3401                         if ((new_prot & new_max) != new_prot) {
3402                                 vm_map_unlock(map);
3403                                 return(KERN_PROTECTION_FAILURE);
3404                         }
3405                 }
3406
3407 #if CONFIG_EMBEDDED
3408                 if (new_prot & VM_PROT_WRITE) {
3409                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3410                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3411                                 new_prot &= ~VM_PROT_EXECUTE;
3412                         }
3413                 }
3414 #endif
3415
3416                 prev = current->vme_end;
3417                 current = current->vme_next;
3418         }
3419         if (end > prev) {
3420                 vm_map_unlock(map);
3421                 return(KERN_INVALID_ADDRESS);
3422         }
3423
3424         /*
3425          *      Go back and fix up protections.
3426          *      Clip to start here if the range starts within
3427          *      the entry.
3428          */
3429
3430         current = entry;
3431         if (current != vm_map_to_entry(map)) {
3432                 /* clip and unnest if necessary */
3433                 vm_map_clip_start(map, current, start);
3434         }
3435
3436         while ((current != vm_map_to_entry(map)) &&
3437                (current->vme_start < end)) {
3438
3439                 vm_prot_t       old_prot;
3440
3441                 vm_map_clip_end(map, current, end);
3442
3443                 assert(!current->use_pmap); /* clipping did unnest if needed */
3444
3445                 old_prot = current->protection;
3446
3447                 if(new_prot & VM_PROT_COPY) {
3448                         /* caller is asking specifically to copy the      */
3449                         /* mapped data, this implies that max protection  */
3450                         /* will include write.  Caller must be prepared   */
3451                         /* for loss of shared memory communication in the */
3452                         /* target area after taking this step */
3453
3454                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3455                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3456                                 current->offset = 0;
3457                         }
3458                         current->needs_copy = TRUE;
3459                         current->max_protection |= VM_PROT_WRITE;
3460                 }
3461
3462                 if (set_max)
3463                         current->protection =
3464                                 (current->max_protection =
3465                                  new_prot & ~VM_PROT_COPY) &
3466                                 old_prot;
3467                 else
3468                         current->protection = new_prot & ~VM_PROT_COPY;
3469
3470                 /*
3471                  *      Update physical map if necessary.
3472                  *      If the request is to turn off write protection,
3473                  *      we won't do it for real (in pmap). This is because
3474                  *      it would cause copy-on-write to fail.  We've already
3475                  *      set, the new protection in the map, so if a
3476                  *      write-protect fault occurred, it will be fixed up
3477                  *      properly, COW or not.
3478                  */
3479                 if (current->protection != old_prot) {
3480                         /* Look one level in we support nested pmaps */
3481                         /* from mapped submaps which are direct entries */
3482                         /* in our map */
3483
3484                         vm_prot_t prot;
3485
3486                         prot = current->protection & ~VM_PROT_WRITE;
3487
3488                         if (override_nx(map, current->alias) && prot)
3489                                 prot |= VM_PROT_EXECUTE;
3490
3491                         if (current->is_sub_map && current->use_pmap) {
3492                                 pmap_protect(current->object.sub_map->pmap,
3493                                              current->vme_start,
3494                                              current->vme_end,
3495                                              prot);
3496                         } else {
3497                                 pmap_protect(map->pmap,
3498                                              current->vme_start,
3499                                              current->vme_end,
3500                                              prot);
3501                         }
3502                 }
3503                 current = current->vme_next;
3504         }
3505
3506         current = entry;
3507         while ((current != vm_map_to_entry(map)) &&
3508                (current->vme_start <= end)) {
3509                 vm_map_simplify_entry(map, current);
3510                 current = current->vme_next;
3511         }
3512
3513         vm_map_unlock(map);
3514         return(KERN_SUCCESS);
3515 }
3516
3517 /*
3518  *      vm_map_inherit:
3519  *
3520  *      Sets the inheritance of the specified address
3521  *      range in the target map.  Inheritance
3522  *      affects how the map will be shared with
3523  *      child maps at the time of vm_map_fork.
3524  */
3525 kern_return_t
3526 vm_map_inherit(
3527         register vm_map_t       map,
3528         register vm_map_offset_t        start,
3529         register vm_map_offset_t        end,
3530         register vm_inherit_t   new_inheritance)
3531 {
3532         register vm_map_entry_t entry;
3533         vm_map_entry_t  temp_entry;
3534
3535         vm_map_lock(map);
3536
3537         VM_MAP_RANGE_CHECK(map, start, end);
3538
3539         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3540                 entry = temp_entry;
3541         }
3542         else {
3543                 temp_entry = temp_entry->vme_next;
3544                 entry = temp_entry;
3545         }
3546
3547         /* first check entire range for submaps which can't support the */
3548         /* given inheritance. */
3549         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3550                 if(entry->is_sub_map) {
3551                         if(new_inheritance == VM_INHERIT_COPY) {
3552                                 vm_map_unlock(map);
3553                                 return(KERN_INVALID_ARGUMENT);
3554                         }
3555                 }
3556
3557                 entry = entry->vme_next;
3558         }
3559
3560         entry = temp_entry;
3561         if (entry != vm_map_to_entry(map)) {
3562                 /* clip and unnest if necessary */
3563                 vm_map_clip_start(map, entry, start);
3564         }
3565
3566         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3567                 vm_map_clip_end(map, entry, end);
3568                 assert(!entry->use_pmap); /* clip did unnest if needed */
3569
3570                 entry->inheritance = new_inheritance;
3571
3572                 entry = entry->vme_next;
3573         }
3574
3575         vm_map_unlock(map);
3576         return(KERN_SUCCESS);
3577 }
3578
3579 /*
3580  * Update the accounting for the amount of wired memory in this map.  If the user has
3581  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3582  */
3583
3584 static kern_return_t
3585 add_wire_counts(
3586         vm_map_t        map,
3587         vm_map_entry_t  entry,
3588         boolean_t       user_wire)
3589 {
3590         vm_map_size_t   size;
3591
3592         if (user_wire) {
3593                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3594
3595                 /*
3596                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3597                  * this map entry.
3598                  */
3599
3600                 if (entry->user_wired_count == 0) {
3601                         size = entry->vme_end - entry->vme_start;
3602
3603                         /*
3604                          * Since this is the first time the user is wiring this map entry, check to see if we're
3605                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3606                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3607                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3608                          * limit, then we fail.
3609                          */
3610
3611                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3612                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3613                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3614                                 return KERN_RESOURCE_SHORTAGE;
3615
3616                         /*
3617                          * The first time the user wires an entry, we also increment the wired_count and add this to
3618                          * the total that has been wired in the map.
3619                          */
3620
3621                         if (entry->wired_count >= MAX_WIRE_COUNT)
3622                                 return KERN_FAILURE;
3623
3624                         entry->wired_count++;
3625                         map->user_wire_size += size;
3626                 }
3627
3628                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3629                         return KERN_FAILURE;
3630
3631                 entry->user_wired_count++;
3632
3633         } else {
3634
3635                 /*
3636                  * The kernel's wiring the memory.  Just bump the count and continue.
3637                  */
3638
3639                 if (entry->wired_count >= MAX_WIRE_COUNT)
3640                         panic("vm_map_wire: too many wirings");
3641
3642                 entry->wired_count++;
3643         }
3644
3645         return KERN_SUCCESS;
3646 }
3647
3648 /*
3649  * Update the memory wiring accounting now that the given map entry is being unwired.
3650  */
3651
3652 static void
3653 subtract_wire_counts(
3654         vm_map_t        map,
3655         vm_map_entry_t  entry,
3656         boolean_t       user_wire)
3657 {
3658
3659         if (user_wire) {
3660
3661                 /*
3662                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3663                  */
3664
3665                 if (entry->user_wired_count == 1) {
3666
3667                         /*
3668                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3669                          * user wired memory for this map.
3670                          */
3671
3672                         assert(entry->wired_count >= 1);
3673                         entry->wired_count--;
3674                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3675                 }
3676
3677                 assert(entry->user_wired_count >= 1);
3678                 entry->user_wired_count--;
3679
3680         } else {
3681
3682                 /*
3683                  * The kernel is unwiring the memory.   Just update the count.
3684                  */
3685
3686                 assert(entry->wired_count >= 1);
3687                 entry->wired_count--;
3688         }
3689 }
3690
3691 /*
3692  *      vm_map_wire:
3693  *
3694  *      Sets the pageability of the specified address range in the
3695  *      target map as wired.  Regions specified as not pageable require
3696  *      locked-down physical memory and physical page maps.  The
3697  *      access_type variable indicates types of accesses that must not
3698  *      generate page faults.  This is checked against protection of
3699  *      memory being locked-down.
3700  *
3701  *      The map must not be locked, but a reference must remain to the
3702  *      map throughout the call.
3703  */
3704 static kern_return_t
3705 vm_map_wire_nested(
3706         register vm_map_t       map,
3707         register vm_map_offset_t        start,
3708         register vm_map_offset_t        end,
3709         register vm_prot_t      access_type,
3710         boolean_t               user_wire,
3711         pmap_t                  map_pmap,
3712         vm_map_offset_t         pmap_addr)
3713 {
3714         register vm_map_entry_t entry;
3715         struct vm_map_entry     *first_entry, tmp_entry;
3716         vm_map_t                real_map;
3717         register vm_map_offset_t        s,e;
3718         kern_return_t           rc;
3719         boolean_t               need_wakeup;
3720         boolean_t               main_map = FALSE;
3721         wait_interrupt_t        interruptible_state;
3722         thread_t                cur_thread;
3723         unsigned int            last_timestamp;
3724         vm_map_size_t           size;
3725
3726         vm_map_lock(map);
3727         if(map_pmap == NULL)
3728                 main_map = TRUE;
3729         last_timestamp = map->timestamp;
3730
3731         VM_MAP_RANGE_CHECK(map, start, end);
3732         assert(page_aligned(start));
3733         assert(page_aligned(end));
3734         if (start == end) {
3735                 /* We wired what the caller asked for, zero pages */
3736                 vm_map_unlock(map);
3737                 return KERN_SUCCESS;
3738         }
3739
3740         need_wakeup = FALSE;
3741         cur_thread = current_thread();
3742
3743         s = start;
3744         rc = KERN_SUCCESS;
3745
3746         if (vm_map_lookup_entry(map, s, &first_entry)) {
3747                 entry = first_entry;
3748                 /*
3749                  * vm_map_clip_start will be done later.
3750                  * We don't want to unnest any nested submaps here !
3751                  */
3752         } else {
3753                 /* Start address is not in map */
3754                 rc = KERN_INVALID_ADDRESS;
3755                 goto done;
3756         }
3757
3758         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3759                 /*
3760                  * At this point, we have wired from "start" to "s".
3761                  * We still need to wire from "s" to "end".
3762                  *
3763                  * "entry" hasn't been clipped, so it could start before "s"
3764                  * and/or end after "end".
3765                  */
3766
3767                 /* "e" is how far we want to wire in this entry */
3768                 e = entry->vme_end;
3769                 if (e > end)
3770                         e = end;
3771
3772                 /*
3773                  * If another thread is wiring/unwiring this entry then
3774                  * block after informing other thread to wake us up.
3775                  */
3776                 if (entry->in_transition) {
3777                         wait_result_t wait_result;
3778
3779                         /*
3780                          * We have not clipped the entry.  Make sure that
3781                          * the start address is in range so that the lookup
3782                          * below will succeed.
3783                          * "s" is the current starting point: we've already
3784                          * wired from "start" to "s" and we still have
3785                          * to wire from "s" to "end".
3786                          */
3787
3788                         entry->needs_wakeup = TRUE;
3789
3790                         /*
3791                          * wake up anybody waiting on entries that we have
3792                          * already wired.
3793                          */
3794                         if (need_wakeup) {
3795                                 vm_map_entry_wakeup(map);
3796                                 need_wakeup = FALSE;
3797                         }
3798                         /*
3799                          * User wiring is interruptible
3800                          */
3801                         wait_result = vm_map_entry_wait(map,
3802                                                         (user_wire) ? THREAD_ABORTSAFE :
3803                                                         THREAD_UNINT);
3804                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3805                                 /*
3806                                  * undo the wirings we have done so far
3807                                  * We do not clear the needs_wakeup flag,
3808                                  * because we cannot tell if we were the
3809                                  * only one waiting.
3810                                  */
3811                                 rc = KERN_FAILURE;
3812                                 goto done;
3813                         }
3814
3815                         /*
3816                          * Cannot avoid a lookup here. reset timestamp.
3817                          */
3818                         last_timestamp = map->timestamp;
3819
3820                         /*
3821                          * The entry could have been clipped, look it up again.
3822                          * Worse that can happen is, it may not exist anymore.
3823                          */
3824                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3825                                 /*
3826                                  * User: undo everything upto the previous
3827                                  * entry.  let vm_map_unwire worry about
3828                                  * checking the validity of the range.
3829                                  */
3830                                 rc = KERN_FAILURE;
3831                                 goto done;
3832                         }
3833                         entry = first_entry;
3834                         continue;
3835                 }
3836
3837                 if (entry->is_sub_map) {
3838                         vm_map_offset_t sub_start;
3839                         vm_map_offset_t sub_end;
3840                         vm_map_offset_t local_start;
3841                         vm_map_offset_t local_end;
3842                         pmap_t          pmap;
3843
3844                         vm_map_clip_start(map, entry, s);
3845                         vm_map_clip_end(map, entry, end);
3846
3847                         sub_start = entry->offset;
3848                         sub_end = entry->vme_end;
3849                         sub_end += entry->offset - entry->vme_start;
3850
3851                         local_end = entry->vme_end;
3852                         if(map_pmap == NULL) {
3853                                 vm_object_t             object;
3854                                 vm_object_offset_t      offset;
3855                                 vm_prot_t               prot;
3856                                 boolean_t               wired;
3857                                 vm_map_entry_t          local_entry;
3858                                 vm_map_version_t         version;
3859                                 vm_map_t                lookup_map;
3860
3861                                 if(entry->use_pmap) {
3862                                         pmap = entry->object.sub_map->pmap;
3863                                         /* ppc implementation requires that */
3864                                         /* submaps pmap address ranges line */
3865                                         /* up with parent map */
3866 #ifdef notdef
3867                                         pmap_addr = sub_start;
3868 #endif
3869                                         pmap_addr = s;
3870                                 } else {
3871                                         pmap = map->pmap;
3872                                         pmap_addr = s;
3873                                 }
3874
3875                                 if (entry->wired_count) {
3876                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3877                                                 goto done;
3878
3879                                         /*
3880                                          * The map was not unlocked:
3881                                          * no need to goto re-lookup.
3882                                          * Just go directly to next entry.
3883                                          */
3884                                         entry = entry->vme_next;
3885                                         s = entry->vme_start;
3886                                         continue;
3887
3888                                 }
3889
3890                                 /* call vm_map_lookup_locked to */
3891                                 /* cause any needs copy to be   */
3892                                 /* evaluated */
3893                                 local_start = entry->vme_start;
3894                                 lookup_map = map;
3895                                 vm_map_lock_write_to_read(map);
3896                                 if(vm_map_lookup_locked(
3897                                            &lookup_map, local_start,
3898                                            access_type,
3899                                            OBJECT_LOCK_EXCLUSIVE,
3900                                            &version, &object,
3901                                            &offset, &prot, &wired,
3902                                            NULL,
3903                                            &real_map)) {
3904
3905                                         vm_map_unlock_read(lookup_map);
3906                                         vm_map_unwire(map, start,
3907                                                       s, user_wire);
3908                                         return(KERN_FAILURE);
3909                                 }
3910                                 if(real_map != lookup_map)
3911                                         vm_map_unlock(real_map);
3912                                 vm_map_unlock_read(lookup_map);
3913                                 vm_map_lock(map);
3914                                 vm_object_unlock(object);
3915
3916                                 /* we unlocked, so must re-lookup */
3917                                 if (!vm_map_lookup_entry(map,
3918                                                          local_start,
3919                                                          &local_entry)) {
3920                                         rc = KERN_FAILURE;
3921                                         goto done;
3922                                 }
3923
3924                                 /*
3925                                  * entry could have been "simplified",
3926                                  * so re-clip
3927                                  */
3928                                 entry = local_entry;
3929                                 assert(s == local_start);
3930                                 vm_map_clip_start(map, entry, s);
3931                                 vm_map_clip_end(map, entry, end);
3932                                 /* re-compute "e" */
3933                                 e = entry->vme_end;
3934                                 if (e > end)
3935                                         e = end;
3936
3937                                 /* did we have a change of type? */
3938                                 if (!entry->is_sub_map) {
3939                                         last_timestamp = map->timestamp;
3940                                         continue;
3941                                 }
3942                         } else {
3943                                 local_start = entry->vme_start;
3944                                 pmap = map_pmap;
3945                         }
3946
3947                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3948                                 goto done;
3949
3950                         entry->in_transition = TRUE;
3951
3952                         vm_map_unlock(map);
3953                         rc = vm_map_wire_nested(entry->object.sub_map,
3954                                                 sub_start, sub_end,
3955                                                 access_type,
3956                                                 user_wire, pmap, pmap_addr);
3957                         vm_map_lock(map);
3958
3959                         /*
3960                          * Find the entry again.  It could have been clipped
3961                          * after we unlocked the map.
3962                          */
3963                         if (!vm_map_lookup_entry(map, local_start,
3964                                                  &first_entry))
3965                                 panic("vm_map_wire: re-lookup failed");
3966                         entry = first_entry;
3967
3968                         assert(local_start == s);
3969                         /* re-compute "e" */
3970                         e = entry->vme_end;
3971                         if (e > end)
3972                                 e = end;
3973
3974                         last_timestamp = map->timestamp;
3975                         while ((entry != vm_map_to_entry(map)) &&
3976                                (entry->vme_start < e)) {
3977                                 assert(entry->in_transition);
3978                                 entry->in_transition = FALSE;
3979                                 if (entry->needs_wakeup) {
3980                                         entry->needs_wakeup = FALSE;
3981                                         need_wakeup = TRUE;
3982                                 }
3983                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
3984                                         subtract_wire_counts(map, entry, user_wire);
3985                                 }
3986                                 entry = entry->vme_next;
3987                         }
3988                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
3989                                 goto done;
3990                         }
3991
3992                         /* no need to relookup again */
3993                         s = entry->vme_start;
3994                         continue;
3995                 }
3996
3997                 /*
3998                  * If this entry is already wired then increment
3999                  * the appropriate wire reference count.
4000                  */
4001                 if (entry->wired_count) {
4002                         /*
4003                          * entry is already wired down, get our reference
4004                          * after clipping to our range.
4005                          */
4006                         vm_map_clip_start(map, entry, s);
4007                         vm_map_clip_end(map, entry, end);
4008
4009                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4010                                 goto done;
4011
4012                         /* map was not unlocked: no need to relookup */
4013                         entry = entry->vme_next;
4014                         s = entry->vme_start;
4015                         continue;
4016                 }
4017
4018                 /*
4019                  * Unwired entry or wire request transmitted via submap
4020                  */
4021
4022
4023                 /*
4024                  * Perform actions of vm_map_lookup that need the write
4025                  * lock on the map: create a shadow object for a
4026                  * copy-on-write region, or an object for a zero-fill
4027                  * region.
4028                  */
4029                 size = entry->vme_end - entry->vme_start;
4030                 /*
4031                  * If wiring a copy-on-write page, we need to copy it now
4032                  * even if we're only (currently) requesting read access.
4033                  * This is aggressive, but once it's wired we can't move it.
4034                  */
4035                 if (entry->needs_copy) {
4036                         vm_object_shadow(&entry->object.vm_object,
4037                                          &entry->offset, size);
4038                         entry->needs_copy = FALSE;
4039                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4040                         entry->object.vm_object = vm_object_allocate(size);
4041                         entry->offset = (vm_object_offset_t)0;
4042                 }
4043
4044                 vm_map_clip_start(map, entry, s);
4045                 vm_map_clip_end(map, entry, end);
4046
4047                 /* re-compute "e" */
4048                 e = entry->vme_end;
4049                 if (e > end)
4050                         e = end;
4051
4052                 /*
4053                  * Check for holes and protection mismatch.
4054                  * Holes: Next entry should be contiguous unless this
4055                  *        is the end of the region.
4056                  * Protection: Access requested must be allowed, unless
4057                  *      wiring is by protection class
4058                  */
4059                 if ((entry->vme_end < end) &&
4060                     ((entry->vme_next == vm_map_to_entry(map)) ||
4061                      (entry->vme_next->vme_start > entry->vme_end))) {
4062                         /* found a hole */
4063                         rc = KERN_INVALID_ADDRESS;
4064                         goto done;
4065                 }
4066                 if ((entry->protection & access_type) != access_type) {
4067                         /* found a protection problem */
4068                         rc = KERN_PROTECTION_FAILURE;
4069                         goto done;
4070                 }
4071
4072                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4073
4074                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4075                         goto done;
4076
4077                 entry->in_transition = TRUE;
4078
4079                 /*
4080                  * This entry might get split once we unlock the map.
4081                  * In vm_fault_wire(), we need the current range as
4082                  * defined by this entry.  In order for this to work
4083                  * along with a simultaneous clip operation, we make a
4084                  * temporary copy of this entry and use that for the
4085                  * wiring.  Note that the underlying objects do not
4086                  * change during a clip.
4087                  */
4088                 tmp_entry = *entry;
4089
4090                 /*
4091                  * The in_transition state guarentees that the entry
4092                  * (or entries for this range, if split occured) will be
4093                  * there when the map lock is acquired for the second time.
4094                  */
4095                 vm_map_unlock(map);
4096
4097                 if (!user_wire && cur_thread != THREAD_NULL)
4098                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4099                 else
4100                         interruptible_state = THREAD_UNINT;
4101
4102                 if(map_pmap)
4103                         rc = vm_fault_wire(map,
4104                                            &tmp_entry, map_pmap, pmap_addr);
4105                 else
4106                         rc = vm_fault_wire(map,
4107                                            &tmp_entry, map->pmap,
4108                                            tmp_entry.vme_start);
4109
4110                 if (!user_wire && cur_thread != THREAD_NULL)
4111                         thread_interrupt_level(interruptible_state);
4112
4113                 vm_map_lock(map);
4114
4115                 if (last_timestamp+1 != map->timestamp) {
4116                         /*
4117                          * Find the entry again.  It could have been clipped
4118                          * after we unlocked the map.
4119                          */
4120                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4121                                                  &first_entry))
4122                                 panic("vm_map_wire: re-lookup failed");
4123
4124                         entry = first_entry;
4125                 }
4126
4127                 last_timestamp = map->timestamp;
4128
4129                 while ((entry != vm_map_to_entry(map)) &&
4130                        (entry->vme_start < tmp_entry.vme_end)) {
4131                         assert(entry->in_transition);
4132                         entry->in_transition = FALSE;
4133                         if (entry->needs_wakeup) {
4134                                 entry->needs_wakeup = FALSE;
4135                                 need_wakeup = TRUE;
4136                         }
4137                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4138                                 subtract_wire_counts(map, entry, user_wire);
4139                         }
4140                         entry = entry->vme_next;
4141                 }
4142
4143                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4144                         goto done;
4145                 }
4146
4147                 s = entry->vme_start;
4148         } /* end while loop through map entries */
4149
4150 done:
4151         if (rc == KERN_SUCCESS) {
4152                 /* repair any damage we may have made to the VM map */
4153                 vm_map_simplify_range(map, start, end);
4154         }
4155
4156         vm_map_unlock(map);
4157
4158         /*
4159          * wake up anybody waiting on entries we wired.
4160          */
4161         if (need_wakeup)
4162                 vm_map_entry_wakeup(map);
4163
4164         if (rc != KERN_SUCCESS) {
4165                 /* undo what has been wired so far */
4166                 vm_map_unwire(map, start, s, user_wire);
4167         }
4168
4169         return rc;
4170
4171 }
4172
4173 kern_return_t
4174 vm_map_wire(
4175         register vm_map_t       map,
4176         register vm_map_offset_t        start,
4177         register vm_map_offset_t        end,
4178         register vm_prot_t      access_type,
4179         boolean_t               user_wire)
4180 {
4181
4182         kern_return_t   kret;
4183
4184         kret = vm_map_wire_nested(map, start, end, access_type,
4185                                   user_wire, (pmap_t)NULL, 0);
4186         return kret;
4187 }
4188
4189 /*
4190  *      vm_map_unwire:
4191  *
4192  *      Sets the pageability of the specified address range in the target
4193  *      as pageable.  Regions specified must have been wired previously.
4194  *
4195  *      The map must not be locked, but a reference must remain to the map
4196  *      throughout the call.
4197  *
4198  *      Kernel will panic on failures.  User unwire ignores holes and
4199  *      unwired and intransition entries to avoid losing memory by leaving
4200  *      it unwired.
4201  */
4202 static kern_return_t
4203 vm_map_unwire_nested(
4204         register vm_map_t       map,
4205         register vm_map_offset_t        start,
4206         register vm_map_offset_t        end,
4207         boolean_t               user_wire,
4208         pmap_t                  map_pmap,
4209         vm_map_offset_t         pmap_addr)
4210 {
4211         register vm_map_entry_t entry;
4212         struct vm_map_entry     *first_entry, tmp_entry;
4213         boolean_t               need_wakeup;
4214         boolean_t               main_map = FALSE;
4215         unsigned int            last_timestamp;
4216
4217         vm_map_lock(map);
4218         if(map_pmap == NULL)
4219                 main_map = TRUE;
4220         last_timestamp = map->timestamp;
4221
4222         VM_MAP_RANGE_CHECK(map, start, end);
4223         assert(page_aligned(start));
4224         assert(page_aligned(end));
4225
4226         if (start == end) {
4227                 /* We unwired what the caller asked for: zero pages */
4228                 vm_map_unlock(map);
4229                 return KERN_SUCCESS;
4230         }
4231
4232         if (vm_map_lookup_entry(map, start, &first_entry)) {
4233                 entry = first_entry;
4234                 /*
4235                  * vm_map_clip_start will be done later.
4236                  * We don't want to unnest any nested sub maps here !
4237                  */
4238         }
4239         else {
4240                 if (!user_wire) {
4241                         panic("vm_map_unwire: start not found");
4242                 }
4243                 /*      Start address is not in map. */
4244                 vm_map_unlock(map);
4245                 return(KERN_INVALID_ADDRESS);
4246         }
4247
4248         if (entry->superpage_size) {
4249                 /* superpages are always wired */
4250                 vm_map_unlock(map);
4251                 return KERN_INVALID_ADDRESS;
4252         }
4253
4254         need_wakeup = FALSE;
4255         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4256                 if (entry->in_transition) {
4257                         /*
4258                          * 1)
4259                          * Another thread is wiring down this entry. Note
4260                          * that if it is not for the other thread we would
4261                          * be unwiring an unwired entry.  This is not
4262                          * permitted.  If we wait, we will be unwiring memory
4263                          * we did not wire.
4264                          *
4265                          * 2)
4266                          * Another thread is unwiring this entry.  We did not
4267                          * have a reference to it, because if we did, this
4268                          * entry will not be getting unwired now.
4269                          */
4270                         if (!user_wire) {
4271                                 /*
4272                                  * XXX FBDP
4273                                  * This could happen:  there could be some
4274                                  * overlapping vslock/vsunlock operations
4275                                  * going on.
4276                                  * We should probably just wait and retry,
4277                                  * but then we have to be careful that this
4278                                  * entry could get "simplified" after
4279                                  * "in_transition" gets unset and before
4280                                  * we re-lookup the entry, so we would
4281                                  * have to re-clip the entry to avoid
4282                                  * re-unwiring what we have already unwired...
4283                                  * See vm_map_wire_nested().
4284                                  *
4285                                  * Or we could just ignore "in_transition"
4286                                  * here and proceed to decement the wired
4287                                  * count(s) on this entry.  That should be fine
4288                                  * as long as "wired_count" doesn't drop all
4289                                  * the way to 0 (and we should panic if THAT
4290                                  * happens).
4291                                  */
4292                                 panic("vm_map_unwire: in_transition entry");
4293                         }
4294
4295                         entry = entry->vme_next;
4296                         continue;
4297                 }
4298
4299                 if (entry->is_sub_map) {
4300                         vm_map_offset_t sub_start;
4301                         vm_map_offset_t sub_end;
4302                         vm_map_offset_t local_end;
4303                         pmap_t          pmap;
4304
4305                         vm_map_clip_start(map, entry, start);
4306                         vm_map_clip_end(map, entry, end);
4307
4308                         sub_start = entry->offset;
4309                         sub_end = entry->vme_end - entry->vme_start;
4310                         sub_end += entry->offset;
4311                         local_end = entry->vme_end;
4312                         if(map_pmap == NULL) {
4313                                 if(entry->use_pmap) {
4314                                         pmap = entry->object.sub_map->pmap;
4315                                         pmap_addr = sub_start;
4316                                 } else {
4317                                         pmap = map->pmap;
4318                                         pmap_addr = start;
4319                                 }
4320                                 if (entry->wired_count == 0 ||
4321                                     (user_wire && entry->user_wired_count == 0)) {
4322                                         if (!user_wire)
4323                                                 panic("vm_map_unwire: entry is unwired");
4324                                         entry = entry->vme_next;
4325                                         continue;
4326                                 }
4327
4328                                 /*
4329                                  * Check for holes
4330                                  * Holes: Next entry should be contiguous unless
4331                                  * this is the end of the region.
4332                                  */
4333                                 if (((entry->vme_end < end) &&
4334                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4335                                       (entry->vme_next->vme_start
4336                                        > entry->vme_end)))) {
4337                                         if (!user_wire)
4338                                                 panic("vm_map_unwire: non-contiguous region");
4339 /*
4340                                         entry = entry->vme_next;
4341                                         continue;
4342 */
4343                                 }
4344
4345                                 subtract_wire_counts(map, entry, user_wire);
4346
4347                                 if (entry->wired_count != 0) {
4348                                         entry = entry->vme_next;
4349                                         continue;
4350                                 }
4351
4352                                 entry->in_transition = TRUE;
4353                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4354
4355                                 /*
4356                                  * We can unlock the map now. The in_transition state
4357                                  * guarantees existance of the entry.
4358                                  */
4359                                 vm_map_unlock(map);
4360                                 vm_map_unwire_nested(entry->object.sub_map,
4361                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4362                                 vm_map_lock(map);
4363
4364                                 if (last_timestamp+1 != map->timestamp) {
4365                                         /*
4366                                          * Find the entry again.  It could have been
4367                                          * clipped or deleted after we unlocked the map.
4368                                          */
4369                                         if (!vm_map_lookup_entry(map,
4370                                                                  tmp_entry.vme_start,
4371                                                                  &first_entry)) {
4372                                                 if (!user_wire)
4373                                                         panic("vm_map_unwire: re-lookup failed");
4374                                                 entry = first_entry->vme_next;
4375                                         } else
4376                                                 entry = first_entry;
4377                                 }
4378                                 last_timestamp = map->timestamp;
4379
4380                                 /*
4381                                  * clear transition bit for all constituent entries
4382                                  * that were in the original entry (saved in
4383                                  * tmp_entry).  Also check for waiters.
4384                                  */
4385                                 while ((entry != vm_map_to_entry(map)) &&
4386                                        (entry->vme_start < tmp_entry.vme_end)) {
4387                                         assert(entry->in_transition);
4388                                         entry->in_transition = FALSE;
4389                                         if (entry->needs_wakeup) {
4390                                                 entry->needs_wakeup = FALSE;
4391                                                 need_wakeup = TRUE;
4392                                         }
4393                                         entry = entry->vme_next;
4394                                 }
4395                                 continue;
4396                         } else {
4397                                 vm_map_unlock(map);
4398                                 vm_map_unwire_nested(entry->object.sub_map,
4399                                                      sub_start, sub_end, user_wire, map_pmap,
4400                                                      pmap_addr);
4401                                 vm_map_lock(map);
4402
4403                                 if (last_timestamp+1 != map->timestamp) {
4404                                         /*
4405                                          * Find the entry again.  It could have been
4406                                          * clipped or deleted after we unlocked the map.
4407                                          */
4408                                         if (!vm_map_lookup_entry(map,
4409                                                                  tmp_entry.vme_start,
4410                                                                  &first_entry)) {
4411                                                 if (!user_wire)
4412                                                         panic("vm_map_unwire: re-lookup failed");
4413                                                 entry = first_entry->vme_next;
4414                                         } else
4415                                                 entry = first_entry;
4416                                 }
4417                                 last_timestamp = map->timestamp;
4418                         }
4419                 }
4420
4421
4422                 if ((entry->wired_count == 0) ||
4423                     (user_wire && entry->user_wired_count == 0)) {
4424                         if (!user_wire)
4425                                 panic("vm_map_unwire: entry is unwired");
4426
4427                         entry = entry->vme_next;
4428                         continue;
4429                 }
4430
4431                 assert(entry->wired_count > 0 &&
4432                        (!user_wire || entry->user_wired_count > 0));
4433
4434                 vm_map_clip_start(map, entry, start);
4435                 vm_map_clip_end(map, entry, end);
4436
4437                 /*
4438                  * Check for holes
4439                  * Holes: Next entry should be contiguous unless
4440                  *        this is the end of the region.
4441                  */
4442                 if (((entry->vme_end < end) &&
4443                      ((entry->vme_next == vm_map_to_entry(map)) ||
4444                       (entry->vme_next->vme_start > entry->vme_end)))) {
4445
4446                         if (!user_wire)
4447                                 panic("vm_map_unwire: non-contiguous region");
4448                         entry = entry->vme_next;
4449                         continue;
4450                 }
4451
4452                 subtract_wire_counts(map, entry, user_wire);
4453
4454                 if (entry->wired_count != 0) {
4455                         entry = entry->vme_next;
4456                         continue;
4457                 }
4458
4459                 if(entry->zero_wired_pages) {
4460                         entry->zero_wired_pages = FALSE;
4461                 }
4462
4463                 entry->in_transition = TRUE;
4464                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4465
4466                 /*
4467                  * We can unlock the map now. The in_transition state
4468                  * guarantees existance of the entry.
4469                  */
4470                 vm_map_unlock(map);
4471                 if(map_pmap) {
4472                         vm_fault_unwire(map,
4473                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4474                 } else {
4475                         vm_fault_unwire(map,
4476                                         &tmp_entry, FALSE, map->pmap,
4477                                         tmp_entry.vme_start);
4478                 }
4479                 vm_map_lock(map);
4480
4481                 if (last_timestamp+1 != map->timestamp) {
4482                         /*
4483                          * Find the entry again.  It could have been clipped
4484                          * or deleted after we unlocked the map.
4485                          */
4486                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4487                                                  &first_entry)) {
4488                                 if (!user_wire)
4489                                         panic("vm_map_unwire: re-lookup failed");
4490                                 entry = first_entry->vme_next;
4491                         } else
4492                                 entry = first_entry;
4493                 }
4494                 last_timestamp = map->timestamp;
4495
4496                 /*
4497                  * clear transition bit for all constituent entries that
4498                  * were in the original entry (saved in tmp_entry).  Also
4499                  * check for waiters.
4500                  */
4501                 while ((entry != vm_map_to_entry(map)) &&
4502                        (entry->vme_start < tmp_entry.vme_end)) {
4503                         assert(entry->in_transition);
4504                         entry->in_transition = FALSE;
4505                         if (entry->needs_wakeup) {
4506                                 entry->needs_wakeup = FALSE;
4507                                 need_wakeup = TRUE;
4508                         }
4509                         entry = entry->vme_next;
4510                 }
4511         }
4512
4513         /*
4514          * We might have fragmented the address space when we wired this
4515          * range of addresses.  Attempt to re-coalesce these VM map entries
4516          * with their neighbors now that they're no longer wired.
4517          * Under some circumstances, address space fragmentation can
4518          * prevent VM object shadow chain collapsing, which can cause
4519          * swap space leaks.
4520          */
4521         vm_map_simplify_range(map, start, end);
4522
4523         vm_map_unlock(map);
4524         /*
4525          * wake up anybody waiting on entries that we have unwired.
4526          */
4527         if (need_wakeup)
4528                 vm_map_entry_wakeup(map);
4529         return(KERN_SUCCESS);
4530
4531 }
4532
4533 kern_return_t
4534 vm_map_unwire(
4535         register vm_map_t       map,
4536         register vm_map_offset_t        start,
4537         register vm_map_offset_t        end,
4538         boolean_t               user_wire)
4539 {
4540         return vm_map_unwire_nested(map, start, end,
4541                                     user_wire, (pmap_t)NULL, 0);
4542 }
4543
4544
4545 /*
4546  *      vm_map_entry_delete:    [ internal use only ]
4547  *
4548  *      Deallocate the given entry from the target map.
4549  */
4550 static void
4551 vm_map_entry_delete(
4552         register vm_map_t       map,
4553         register vm_map_entry_t entry)
4554 {
4555         register vm_map_offset_t        s, e;
4556         register vm_object_t    object;
4557         register vm_map_t       submap;
4558
4559         s = entry->vme_start;
4560         e = entry->vme_end;
4561         assert(page_aligned(s));
4562         assert(page_aligned(e));
4563         assert(entry->wired_count == 0);
4564         assert(entry->user_wired_count == 0);
4565         assert(!entry->permanent);
4566
4567         if (entry->is_sub_map) {
4568                 object = NULL;
4569                 submap = entry->object.sub_map;
4570         } else {
4571                 submap = NULL;
4572                 object = entry->object.vm_object;
4573         }
4574
4575         vm_map_store_entry_unlink(map, entry);
4576         map->size -= e - s;
4577
4578         vm_map_entry_dispose(map, entry);
4579
4580         vm_map_unlock(map);
4581         /*
4582          *      Deallocate the object only after removing all
4583          *      pmap entries pointing to its pages.
4584          */
4585         if (submap)
4586                 vm_map_deallocate(submap);
4587         else
4588                 vm_object_deallocate(object);
4589
4590 }
4591
4592 void
4593 vm_map_submap_pmap_clean(
4594         vm_map_t        map,
4595         vm_map_offset_t start,
4596         vm_map_offset_t end,
4597         vm_map_t        sub_map,
4598         vm_map_offset_t offset)
4599 {
4600         vm_map_offset_t submap_start;
4601         vm_map_offset_t submap_end;
4602         vm_map_size_t   remove_size;
4603         vm_map_entry_t  entry;
4604
4605         submap_end = offset + (end - start);
4606         submap_start = offset;
4607
4608         vm_map_lock_read(sub_map);
4609         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4610
4611                 remove_size = (entry->vme_end - entry->vme_start);
4612                 if(offset > entry->vme_start)
4613                         remove_size -= offset - entry->vme_start;
4614
4615
4616                 if(submap_end < entry->vme_end) {
4617                         remove_size -=
4618                                 entry->vme_end - submap_end;
4619                 }
4620                 if(entry->is_sub_map) {
4621                         vm_map_submap_pmap_clean(
4622                                 sub_map,
4623                                 start,
4624                                 start + remove_size,
4625                                 entry->object.sub_map,
4626                                 entry->offset);
4627                 } else {
4628
4629                         if((map->mapped) && (map->ref_count)
4630                            && (entry->object.vm_object != NULL)) {
4631                                 vm_object_pmap_protect(
4632                                         entry->object.vm_object,
4633                                         entry->offset+(offset-entry->vme_start),
4634                                         remove_size,
4635                                         PMAP_NULL,
4636                                         entry->vme_start,
4637                                         VM_PROT_NONE);
4638                         } else {
4639                                 pmap_remove(map->pmap,
4640                                             (addr64_t)start,
4641                                             (addr64_t)(start + remove_size));
4642                         }
4643                 }
4644         }
4645
4646         entry = entry->vme_next;
4647
4648         while((entry != vm_map_to_entry(sub_map))
4649               && (entry->vme_start < submap_end)) {
4650                 remove_size = (entry->vme_end - entry->vme_start);
4651                 if(submap_end < entry->vme_end) {
4652                         remove_size -= entry->vme_end - submap_end;
4653                 }
4654                 if(entry->is_sub_map) {
4655                         vm_map_submap_pmap_clean(
4656                                 sub_map,
4657                                 (start + entry->vme_start) - offset,
4658                                 ((start + entry->vme_start) - offset) + remove_size,
4659                                 entry->object.sub_map,
4660                                 entry->offset);
4661                 } else {
4662                         if((map->mapped) && (map->ref_count)
4663                            && (entry->object.vm_object != NULL)) {
4664                                 vm_object_pmap_protect(
4665                                         entry->object.vm_object,
4666                                         entry->offset,
4667                                         remove_size,
4668                                         PMAP_NULL,
4669                                         entry->vme_start,
4670                                         VM_PROT_NONE);
4671                         } else {
4672                                 pmap_remove(map->pmap,
4673                                             (addr64_t)((start + entry->vme_start)
4674                                                        - offset),
4675                                             (addr64_t)(((start + entry->vme_start)
4676                                                         - offset) + remove_size));
4677                         }
4678                 }
4679                 entry = entry->vme_next;
4680         }
4681         vm_map_unlock_read(sub_map);
4682         return;
4683 }
4684
4685 /*
4686  *      vm_map_delete:  [ internal use only ]
4687  *
4688  *      Deallocates the given address range from the target map.
4689  *      Removes all user wirings. Unwires one kernel wiring if
4690  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4691  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4692  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4693  *
4694  *      This routine is called with map locked and leaves map locked.
4695  */
4696 static kern_return_t
4697 vm_map_delete(
4698         vm_map_t                map,
4699         vm_map_offset_t         start,
4700         vm_map_offset_t         end,
4701         int                     flags,
4702         vm_map_t                zap_map)
4703 {
4704         vm_map_entry_t          entry, next;
4705         struct   vm_map_entry   *first_entry, tmp_entry;
4706         register vm_map_offset_t s;
4707         register vm_object_t    object;
4708         boolean_t               need_wakeup;
4709         unsigned int            last_timestamp = ~0; /* unlikely value */
4710         int                     interruptible;
4711
4712         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4713                 THREAD_ABORTSAFE : THREAD_UNINT;
4714
4715         /*
4716          * All our DMA I/O operations in IOKit are currently done by
4717          * wiring through the map entries of the task requesting the I/O.
4718          * Because of this, we must always wait for kernel wirings
4719          * to go away on the entries before deleting them.
4720          *
4721          * Any caller who wants to actually remove a kernel wiring
4722          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4723          * properly remove one wiring instead of blasting through
4724          * them all.
4725          */
4726         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4727
4728         while(1) {
4729                 /*
4730                  *      Find the start of the region, and clip it
4731                  */
4732                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4733                         entry = first_entry;
4734                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4735                                 start = SUPERPAGE_ROUND_DOWN(start);
4736                                 continue;
4737                         }
4738                         if (start == entry->vme_start) {
4739                                 /*
4740                                  * No need to clip.  We don't want to cause
4741                                  * any unnecessary unnesting in this case...
4742                                  */
4743                         } else {
4744                                 vm_map_clip_start(map, entry, start);
4745                         }
4746
4747                         /*
4748                          *      Fix the lookup hint now, rather than each
4749                          *      time through the loop.
4750                          */
4751                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4752                 } else {
4753                         entry = first_entry->vme_next;
4754                 }
4755                 break;
4756         }
4757         if (entry->superpage_size)
4758                 end = SUPERPAGE_ROUND_UP(end);
4759
4760         need_wakeup = FALSE;
4761         /*
4762          *      Step through all entries in this region
4763          */
4764         s = entry->vme_start;
4765         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4766                 /*
4767                  * At this point, we have deleted all the memory entries
4768                  * between "start" and "s".  We still need to delete
4769                  * all memory entries between "s" and "end".
4770                  * While we were blocked and the map was unlocked, some
4771                  * new memory entries could have been re-allocated between
4772                  * "start" and "s" and we don't want to mess with those.
4773                  * Some of those entries could even have been re-assembled
4774                  * with an entry after "s" (in vm_map_simplify_entry()), so
4775                  * we may have to vm_map_clip_start() again.
4776                  */
4777
4778                 if (entry->vme_start >= s) {
4779                         /*
4780                          * This entry starts on or after "s"
4781                          * so no need to clip its start.
4782                          */
4783                 } else {
4784                         /*
4785                          * This entry has been re-assembled by a
4786                          * vm_map_simplify_entry().  We need to
4787                          * re-clip its start.
4788                          */
4789                         vm_map_clip_start(map, entry, s);
4790                 }
4791                 if (entry->vme_end <= end) {
4792                         /*
4793                          * This entry is going away completely, so no need
4794                          * to clip and possibly cause an unnecessary unnesting.
4795                          */
4796                 } else {
4797                         vm_map_clip_end(map, entry, end);
4798                 }
4799
4800                 if (entry->permanent) {
4801                         panic("attempt to remove permanent VM map entry "
4802                               "%p [0x%llx:0x%llx]\n",
4803                               entry, (uint64_t) s, (uint64_t) end);
4804                 }
4805
4806
4807                 if (entry->in_transition) {
4808                         wait_result_t wait_result;
4809
4810                         /*
4811                          * Another thread is wiring/unwiring this entry.
4812                          * Let the other thread know we are waiting.
4813                          */
4814                         assert(s == entry->vme_start);
4815                         entry->needs_wakeup = TRUE;
4816
4817                         /*
4818                          * wake up anybody waiting on entries that we have
4819                          * already unwired/deleted.
4820                          */
4821                         if (need_wakeup) {
4822                                 vm_map_entry_wakeup(map);
4823                                 need_wakeup = FALSE;
4824                         }
4825
4826                         wait_result = vm_map_entry_wait(map, interruptible);
4827
4828                         if (interruptible &&
4829                             wait_result == THREAD_INTERRUPTED) {
4830                                 /*
4831                                  * We do not clear the needs_wakeup flag,
4832                                  * since we cannot tell if we were the only one.
4833                                  */
4834                                 vm_map_unlock(map);
4835                                 return KERN_ABORTED;
4836                         }
4837
4838                         /*
4839                          * The entry could have been clipped or it
4840                          * may not exist anymore.  Look it up again.
4841                          */
4842                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4843                                 assert((map != kernel_map) &&
4844                                        (!entry->is_sub_map));
4845                                 /*
4846                                  * User: use the next entry
4847                                  */
4848                                 entry = first_entry->vme_next;
4849                                 s = entry->vme_start;
4850                         } else {
4851                                 entry = first_entry;
4852                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4853                         }
4854                         last_timestamp = map->timestamp;
4855                         continue;
4856                 } /* end in_transition */
4857
4858                 if (entry->wired_count) {
4859                         boolean_t       user_wire;
4860
4861                         user_wire = entry->user_wired_count > 0;
4862
4863                         /*
4864                          *      Remove a kernel wiring if requested
4865                          */
4866                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
4867                                 entry->wired_count--;
4868                         }
4869
4870                         /*
4871                          *      Remove all user wirings for proper accounting
4872                          */
4873                         if (entry->user_wired_count > 0) {
4874                                 while (entry->user_wired_count)
4875                                         subtract_wire_counts(map, entry, user_wire);
4876                         }
4877
4878                         if (entry->wired_count != 0) {
4879                                 assert(map != kernel_map);
4880                                 /*
4881                                  * Cannot continue.  Typical case is when
4882                                  * a user thread has physical io pending on
4883                                  * on this page.  Either wait for the
4884                                  * kernel wiring to go away or return an
4885                                  * error.
4886                                  */
4887                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4888                                         wait_result_t wait_result;
4889
4890                                         assert(s == entry->vme_start);
4891                                         entry->needs_wakeup = TRUE;
4892                                         wait_result = vm_map_entry_wait(map,
4893                                                                         interruptible);
4894
4895                                         if (interruptible &&
4896                                             wait_result == THREAD_INTERRUPTED) {
4897                                                 /*
4898                                                  * We do not clear the
4899                                                  * needs_wakeup flag, since we
4900                                                  * cannot tell if we were the
4901                                                  * only one.
4902                                                  */
4903                                                 vm_map_unlock(map);
4904                                                 return KERN_ABORTED;
4905                                         }
4906
4907                                         /*
4908                                          * The entry could have been clipped or
4909                                          * it may not exist anymore.  Look it
4910                                          * up again.
4911                                          */
4912                                         if (!vm_map_lookup_entry(map, s,
4913                                                                  &first_entry)) {
4914                                                 assert(map != kernel_map);
4915                                                 /*
4916                                                  * User: use the next entry
4917                                                  */
4918                                                 entry = first_entry->vme_next;
4919                                                 s = entry->vme_start;
4920                                         } else {
4921                                                 entry = first_entry;
4922                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4923                                         }
4924                                         last_timestamp = map->timestamp;
4925                                         continue;
4926                                 }
4927                                 else {
4928                                         return KERN_FAILURE;
4929                                 }
4930                         }
4931
4932                         entry->in_transition = TRUE;
4933                         /*
4934                          * copy current entry.  see comment in vm_map_wire()
4935                          */
4936                         tmp_entry = *entry;
4937                         assert(s == entry->vme_start);
4938
4939                         /*
4940                          * We can unlock the map now. The in_transition
4941                          * state guarentees existance of the entry.
4942                          */
4943                         vm_map_unlock(map);
4944
4945                         if (tmp_entry.is_sub_map) {
4946                                 vm_map_t sub_map;
4947                                 vm_map_offset_t sub_start, sub_end;
4948                                 pmap_t pmap;
4949                                 vm_map_offset_t pmap_addr;
4950
4951
4952                                 sub_map = tmp_entry.object.sub_map;
4953                                 sub_start = tmp_entry.offset;
4954                                 sub_end = sub_start + (tmp_entry.vme_end -
4955                                                        tmp_entry.vme_start);
4956                                 if (tmp_entry.use_pmap) {
4957                                         pmap = sub_map->pmap;
4958                                         pmap_addr = tmp_entry.vme_start;
4959                                 } else {
4960                                         pmap = map->pmap;
4961                                         pmap_addr = tmp_entry.vme_start;
4962                                 }
4963                                 (void) vm_map_unwire_nested(sub_map,
4964                                                             sub_start, sub_end,
4965                                                             user_wire,
4966                                                             pmap, pmap_addr);
4967                         } else {
4968
4969                                 vm_fault_unwire(map, &tmp_entry,
4970                                                 tmp_entry.object.vm_object == kernel_object,
4971                                                 map->pmap, tmp_entry.vme_start);
4972                         }
4973
4974                         vm_map_lock(map);
4975
4976                         if (last_timestamp+1 != map->timestamp) {
4977                                 /*
4978                                  * Find the entry again.  It could have
4979                                  * been clipped after we unlocked the map.
4980                                  */
4981                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
4982                                         assert((map != kernel_map) &&
4983                                                (!entry->is_sub_map));
4984                                         first_entry = first_entry->vme_next;
4985                                         s = first_entry->vme_start;
4986                                 } else {
4987                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4988                                 }
4989                         } else {
4990                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4991                                 first_entry = entry;
4992                         }
4993
4994                         last_timestamp = map->timestamp;
4995
4996                         entry = first_entry;
4997                         while ((entry != vm_map_to_entry(map)) &&
4998                                (entry->vme_start < tmp_entry.vme_end)) {
4999                                 assert(entry->in_transition);
5000                                 entry->in_transition = FALSE;
5001                                 if (entry->needs_wakeup) {
5002                                         entry->needs_wakeup = FALSE;
5003                                         need_wakeup = TRUE;
5004                                 }
5005                                 entry = entry->vme_next;
5006                         }
5007                         /*
5008                          * We have unwired the entry(s).  Go back and
5009                          * delete them.
5010                          */
5011                         entry = first_entry;
5012                         continue;
5013                 }
5014
5015                 /* entry is unwired */
5016                 assert(entry->wired_count == 0);
5017                 assert(entry->user_wired_count == 0);
5018
5019                 assert(s == entry->vme_start);
5020
5021                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5022                         /*
5023                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5024                          * vm_map_delete(), some map entries might have been
5025                          * transferred to a "zap_map", which doesn't have a
5026                          * pmap.  The original pmap has already been flushed
5027                          * in the vm_map_delete() call targeting the original
5028                          * map, but when we get to destroying the "zap_map",
5029                          * we don't have any pmap to flush, so let's just skip
5030                          * all this.
5031                          */
5032                 } else if (entry->is_sub_map) {
5033                         if (entry->use_pmap) {
5034 #ifndef NO_NESTED_PMAP
5035                                 pmap_unnest(map->pmap,
5036                                             (addr64_t)entry->vme_start,
5037                                             entry->vme_end - entry->vme_start);
5038 #endif  /* NO_NESTED_PMAP */
5039                                 if ((map->mapped) && (map->ref_count)) {
5040                                         /* clean up parent map/maps */
5041                                         vm_map_submap_pmap_clean(
5042                                                 map, entry->vme_start,
5043                                                 entry->vme_end,
5044                                                 entry->object.sub_map,
5045                                                 entry->offset);
5046                                 }
5047                         } else {
5048                                 vm_map_submap_pmap_clean(
5049                                         map, entry->vme_start, entry->vme_end,
5050                                         entry->object.sub_map,
5051                                         entry->offset);
5052                         }
5053                 } else if (entry->object.vm_object != kernel_object) {
5054                         object = entry->object.vm_object;
5055                         if((map->mapped) && (map->ref_count)) {
5056                                 vm_object_pmap_protect(
5057                                         object, entry->offset,
5058                                         entry->vme_end - entry->vme_start,
5059                                         PMAP_NULL,
5060                                         entry->vme_start,
5061                                         VM_PROT_NONE);
5062                         } else {
5063                                 pmap_remove(map->pmap,
5064                                             (addr64_t)entry->vme_start,
5065                                             (addr64_t)entry->vme_end);
5066                         }
5067                 }
5068
5069                 /*
5070                  * All pmap mappings for this map entry must have been
5071                  * cleared by now.
5072                  */
5073                 assert(vm_map_pmap_is_empty(map,
5074                                             entry->vme_start,
5075                                             entry->vme_end));
5076
5077                 next = entry->vme_next;
5078                 s = next->vme_start;
5079                 last_timestamp = map->timestamp;
5080
5081                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5082                     zap_map != VM_MAP_NULL) {
5083                         vm_map_size_t entry_size;
5084                         /*
5085                          * The caller wants to save the affected VM map entries
5086                          * into the "zap_map".  The caller will take care of
5087                          * these entries.
5088                          */
5089                         /* unlink the entry from "map" ... */
5090                         vm_map_store_entry_unlink(map, entry);
5091                         /* ... and add it to the end of the "zap_map" */
5092                         vm_map_store_entry_link(zap_map,
5093                                           vm_map_last_entry(zap_map),
5094                                           entry);
5095                         entry_size = entry->vme_end - entry->vme_start;
5096                         map->size -= entry_size;
5097                         zap_map->size += entry_size;
5098                         /* we didn't unlock the map, so no timestamp increase */
5099                         last_timestamp--;
5100                 } else {
5101                         vm_map_entry_delete(map, entry);
5102                         /* vm_map_entry_delete unlocks the map */
5103                         vm_map_lock(map);
5104                 }
5105
5106                 entry = next;
5107
5108                 if(entry == vm_map_to_entry(map)) {
5109                         break;
5110                 }
5111                 if (last_timestamp+1 != map->timestamp) {
5112                         /*
5113                          * we are responsible for deleting everything
5114                          * from the give space, if someone has interfered
5115                          * we pick up where we left off, back fills should
5116                          * be all right for anyone except map_delete and
5117                          * we have to assume that the task has been fully
5118                          * disabled before we get here
5119                          */
5120                         if (!vm_map_lookup_entry(map, s, &entry)){
5121                                 entry = entry->vme_next;
5122                                 s = entry->vme_start;
5123                         } else {
5124                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5125                         }
5126                         /*
5127                          * others can not only allocate behind us, we can
5128                          * also see coalesce while we don't have the map lock
5129                          */
5130                         if(entry == vm_map_to_entry(map)) {
5131                                 break;
5132                         }
5133                 }
5134                 last_timestamp = map->timestamp;
5135         }
5136
5137         if (map->wait_for_space)
5138                 thread_wakeup((event_t) map);
5139         /*
5140          * wake up anybody waiting on entries that we have already deleted.
5141          */
5142         if (need_wakeup)
5143                 vm_map_entry_wakeup(map);
5144
5145         return KERN_SUCCESS;
5146 }
5147
5148 /*
5149  *      vm_map_remove:
5150  *
5151  *      Remove the given address range from the target map.
5152  *      This is the exported form of vm_map_delete.
5153  */
5154 kern_return_t
5155 vm_map_remove(
5156         register vm_map_t       map,
5157         register vm_map_offset_t        start,
5158         register vm_map_offset_t        end,
5159         register boolean_t      flags)
5160 {
5161         register kern_return_t  result;
5162
5163         vm_map_lock(map);
5164         VM_MAP_RANGE_CHECK(map, start, end);
5165         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5166         vm_map_unlock(map);
5167
5168         return(result);
5169 }
5170
5171
5172 /*
5173  *      Routine:        vm_map_copy_discard
5174  *
5175  *      Description:
5176  *              Dispose of a map copy object (returned by
5177  *              vm_map_copyin).
5178  */
5179 void
5180 vm_map_copy_discard(
5181         vm_map_copy_t   copy)
5182 {
5183         if (copy == VM_MAP_COPY_NULL)
5184                 return;
5185
5186         switch (copy->type) {
5187         case VM_MAP_COPY_ENTRY_LIST:
5188                 while (vm_map_copy_first_entry(copy) !=
5189                        vm_map_copy_to_entry(copy)) {
5190                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5191
5192                         vm_map_copy_entry_unlink(copy, entry);
5193                         vm_object_deallocate(entry->object.vm_object);
5194                         vm_map_copy_entry_dispose(copy, entry);
5195                 }
5196                 break;
5197         case VM_MAP_COPY_OBJECT:
5198                 vm_object_deallocate(copy->cpy_object);
5199                 break;
5200         case VM_MAP_COPY_KERNEL_BUFFER:
5201
5202                 /*
5203                  * The vm_map_copy_t and possibly the data buffer were
5204                  * allocated by a single call to kalloc(), i.e. the
5205                  * vm_map_copy_t was not allocated out of the zone.
5206                  */
5207                 kfree(copy, copy->cpy_kalloc_size);
5208                 return;
5209         }
5210         zfree(vm_map_copy_zone, copy);
5211 }
5212
5213 /*
5214  *      Routine:        vm_map_copy_copy
5215  *
5216  *      Description:
5217  *                      Move the information in a map copy object to
5218  *                      a new map copy object, leaving the old one
5219  *                      empty.
5220  *
5221  *                      This is used by kernel routines that need
5222  *                      to look at out-of-line data (in copyin form)
5223  *                      before deciding whether to return SUCCESS.
5224  *                      If the routine returns FAILURE, the original
5225  *                      copy object will be deallocated; therefore,
5226  *                      these routines must make a copy of the copy
5227  *                      object and leave the original empty so that
5228  *                      deallocation will not fail.
5229  */
5230 vm_map_copy_t
5231 vm_map_copy_copy(
5232         vm_map_copy_t   copy)
5233 {
5234         vm_map_copy_t   new_copy;
5235
5236         if (copy == VM_MAP_COPY_NULL)
5237                 return VM_MAP_COPY_NULL;
5238
5239         /*
5240          * Allocate a new copy object, and copy the information
5241          * from the old one into it.
5242          */
5243
5244         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5245         *new_copy = *copy;
5246
5247         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5248                 /*
5249                  * The links in the entry chain must be
5250                  * changed to point to the new copy object.
5251                  */
5252                 vm_map_copy_first_entry(copy)->vme_prev
5253                         = vm_map_copy_to_entry(new_copy);
5254                 vm_map_copy_last_entry(copy)->vme_next
5255                         = vm_map_copy_to_entry(new_copy);
5256         }
5257
5258         /*
5259          * Change the old copy object into one that contains
5260          * nothing to be deallocated.
5261          */
5262         copy->type = VM_MAP_COPY_OBJECT;
5263         copy->cpy_object = VM_OBJECT_NULL;
5264
5265         /*
5266          * Return the new object.
5267          */
5268         return new_copy;
5269 }
5270
5271 static kern_return_t
5272 vm_map_overwrite_submap_recurse(
5273         vm_map_t        dst_map,
5274         vm_map_offset_t dst_addr,
5275         vm_map_size_t   dst_size)
5276 {
5277         vm_map_offset_t dst_end;
5278         vm_map_entry_t  tmp_entry;
5279         vm_map_entry_t  entry;
5280         kern_return_t   result;
5281         boolean_t       encountered_sub_map = FALSE;
5282
5283
5284
5285         /*
5286          *      Verify that the destination is all writeable
5287          *      initially.  We have to trunc the destination
5288          *      address and round the copy size or we'll end up
5289          *      splitting entries in strange ways.
5290          */
5291
5292         dst_end = vm_map_round_page(dst_addr + dst_size);
5293         vm_map_lock(dst_map);
5294
5295 start_pass_1:
5296         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5297                 vm_map_unlock(dst_map);
5298                 return(KERN_INVALID_ADDRESS);
5299         }
5300
5301         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5302         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5303
5304         for (entry = tmp_entry;;) {
5305                 vm_map_entry_t  next;
5306
5307                 next = entry->vme_next;
5308                 while(entry->is_sub_map) {
5309                         vm_map_offset_t sub_start;
5310                         vm_map_offset_t sub_end;
5311                         vm_map_offset_t local_end;
5312
5313                         if (entry->in_transition) {
5314                                 /*
5315                                  * Say that we are waiting, and wait for entry.
5316                                  */
5317                                 entry->needs_wakeup = TRUE;
5318                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5319
5320                                 goto start_pass_1;
5321                         }
5322
5323                         encountered_sub_map = TRUE;
5324                         sub_start = entry->offset;
5325
5326                         if(entry->vme_end < dst_end)
5327                                 sub_end = entry->vme_end;
5328                         else
5329                                 sub_end = dst_end;
5330                         sub_end -= entry->vme_start;
5331                         sub_end += entry->offset;
5332                         local_end = entry->vme_end;
5333                         vm_map_unlock(dst_map);
5334
5335                         result = vm_map_overwrite_submap_recurse(
5336                                 entry->object.sub_map,
5337                                 sub_start,
5338                                 sub_end - sub_start);
5339
5340                         if(result != KERN_SUCCESS)
5341                                 return result;
5342                         if (dst_end <= entry->vme_end)
5343                                 return KERN_SUCCESS;
5344                         vm_map_lock(dst_map);
5345                         if(!vm_map_lookup_entry(dst_map, local_end,
5346                                                 &tmp_entry)) {
5347                                 vm_map_unlock(dst_map);
5348                                 return(KERN_INVALID_ADDRESS);
5349                         }
5350                         entry = tmp_entry;
5351                         next = entry->vme_next;
5352                 }
5353
5354                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5355                         vm_map_unlock(dst_map);
5356                         return(KERN_PROTECTION_FAILURE);
5357                 }
5358
5359                 /*
5360                  *      If the entry is in transition, we must wait
5361                  *      for it to exit that state.  Anything could happen
5362                  *      when we unlock the map, so start over.
5363                  */
5364                 if (entry->in_transition) {
5365
5366                         /*
5367                          * Say that we are waiting, and wait for entry.
5368                          */
5369                         entry->needs_wakeup = TRUE;
5370                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5371
5372                         goto start_pass_1;
5373                 }
5374
5375 /*
5376  *              our range is contained completely within this map entry
5377  */
5378                 if (dst_end <= entry->vme_end) {
5379                         vm_map_unlock(dst_map);
5380                         return KERN_SUCCESS;
5381                 }
5382 /*
5383  *              check that range specified is contiguous region
5384  */
5385                 if ((next == vm_map_to_entry(dst_map)) ||
5386                     (next->vme_start != entry->vme_end)) {
5387                         vm_map_unlock(dst_map);
5388                         return(KERN_INVALID_ADDRESS);
5389                 }
5390
5391                 /*
5392                  *      Check for permanent objects in the destination.
5393                  */
5394                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5395                     ((!entry->object.vm_object->internal) ||
5396                      (entry->object.vm_object->true_share))) {
5397                         if(encountered_sub_map) {
5398                                 vm_map_unlock(dst_map);
5399                                 return(KERN_FAILURE);
5400                         }
5401                 }
5402
5403
5404                 entry = next;
5405         }/* for */
5406         vm_map_unlock(dst_map);
5407         return(KERN_SUCCESS);
5408 }
5409
5410 /*
5411  *      Routine:        vm_map_copy_overwrite
5412  *
5413  *      Description:
5414  *              Copy the memory described by the map copy
5415  *              object (copy; returned by vm_map_copyin) onto
5416  *              the specified destination region (dst_map, dst_addr).
5417  *              The destination must be writeable.
5418  *
5419  *              Unlike vm_map_copyout, this routine actually
5420  *              writes over previously-mapped memory.  If the
5421  *              previous mapping was to a permanent (user-supplied)
5422  *              memory object, it is preserved.
5423  *
5424  *              The attributes (protection and inheritance) of the
5425  *              destination region are preserved.
5426  *
5427  *              If successful, consumes the copy object.
5428  *              Otherwise, the caller is responsible for it.
5429  *
5430  *      Implementation notes:
5431  *              To overwrite aligned temporary virtual memory, it is
5432  *              sufficient to remove the previous mapping and insert
5433  *              the new copy.  This replacement is done either on
5434  *              the whole region (if no permanent virtual memory
5435  *              objects are embedded in the destination region) or
5436  *              in individual map entries.
5437  *
5438  *              To overwrite permanent virtual memory , it is necessary
5439  *              to copy each page, as the external memory management
5440  *              interface currently does not provide any optimizations.
5441  *
5442  *              Unaligned memory also has to be copied.  It is possible
5443  *              to use 'vm_trickery' to copy the aligned data.  This is
5444  *              not done but not hard to implement.
5445  *
5446  *              Once a page of permanent memory has been overwritten,
5447  *              it is impossible to interrupt this function; otherwise,
5448  *              the call would be neither atomic nor location-independent.
5449  *              The kernel-state portion of a user thread must be
5450  *              interruptible.
5451  *
5452  *              It may be expensive to forward all requests that might
5453  *              overwrite permanent memory (vm_write, vm_copy) to
5454  *              uninterruptible kernel threads.  This routine may be
5455  *              called by interruptible threads; however, success is
5456  *              not guaranteed -- if the request cannot be performed
5457  *              atomically and interruptibly, an error indication is
5458  *              returned.
5459  */
5460
5461 static kern_return_t
5462 vm_map_copy_overwrite_nested(
5463         vm_map_t                dst_map,
5464         vm_map_address_t        dst_addr,
5465         vm_map_copy_t           copy,
5466         boolean_t               interruptible,
5467         pmap_t                  pmap,
5468         boolean_t               discard_on_success)
5469 {
5470         vm_map_offset_t         dst_end;
5471         vm_map_entry_t          tmp_entry;
5472         vm_map_entry_t          entry;
5473         kern_return_t           kr;
5474         boolean_t               aligned = TRUE;
5475         boolean_t               contains_permanent_objects = FALSE;
5476         boolean_t               encountered_sub_map = FALSE;
5477         vm_map_offset_t         base_addr;
5478         vm_map_size_t           copy_size;
5479         vm_map_size_t           total_size;
5480
5481
5482         /*
5483          *      Check for null copy object.
5484          */
5485
5486         if (copy == VM_MAP_COPY_NULL)
5487                 return(KERN_SUCCESS);
5488
5489         /*
5490          *      Check for special kernel buffer allocated
5491          *      by new_ipc_kmsg_copyin.
5492          */
5493
5494         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5495                 return(vm_map_copyout_kernel_buffer(
5496                                dst_map, &dst_addr,
5497                                copy, TRUE));
5498         }
5499
5500         /*
5501          *      Only works for entry lists at the moment.  Will
5502          *      support page lists later.
5503          */
5504
5505         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5506
5507         if (copy->size == 0) {
5508                 if (discard_on_success)
5509                         vm_map_copy_discard(copy);
5510                 return(KERN_SUCCESS);
5511         }
5512
5513         /*
5514          *      Verify that the destination is all writeable
5515          *      initially.  We have to trunc the destination
5516          *      address and round the copy size or we'll end up
5517          *      splitting entries in strange ways.
5518          */
5519
5520         if (!page_aligned(copy->size) ||
5521             !page_aligned (copy->offset) ||
5522             !page_aligned (dst_addr))
5523         {
5524                 aligned = FALSE;
5525                 dst_end = vm_map_round_page(dst_addr + copy->size);
5526         } else {
5527                 dst_end = dst_addr + copy->size;
5528         }
5529
5530         vm_map_lock(dst_map);
5531
5532         /* LP64todo - remove this check when vm_map_commpage64()
5533          * no longer has to stuff in a map_entry for the commpage
5534          * above the map's max_offset.
5535          */
5536         if (dst_addr >= dst_map->max_offset) {
5537                 vm_map_unlock(dst_map);
5538                 return(KERN_INVALID_ADDRESS);
5539         }
5540
5541 start_pass_1:
5542         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5543                 vm_map_unlock(dst_map);
5544                 return(KERN_INVALID_ADDRESS);
5545         }
5546         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5547         for (entry = tmp_entry;;) {
5548                 vm_map_entry_t  next = entry->vme_next;
5549
5550                 while(entry->is_sub_map) {
5551                         vm_map_offset_t sub_start;
5552                         vm_map_offset_t sub_end;
5553                         vm_map_offset_t local_end;
5554
5555                         if (entry->in_transition) {
5556
5557                                 /*
5558                                  * Say that we are waiting, and wait for entry.
5559                                  */
5560                                 entry->needs_wakeup = TRUE;
5561                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5562
5563                                 goto start_pass_1;
5564                         }
5565
5566                         local_end = entry->vme_end;
5567                         if (!(entry->needs_copy)) {
5568                                 /* if needs_copy we are a COW submap */
5569                                 /* in such a case we just replace so */
5570                                 /* there is no need for the follow-  */
5571                                 /* ing check.                        */
5572                                 encountered_sub_map = TRUE;
5573                                 sub_start = entry->offset;
5574
5575                                 if(entry->vme_end < dst_end)
5576                                         sub_end = entry->vme_end;
5577                                 else
5578                                         sub_end = dst_end;
5579                                 sub_end -= entry->vme_start;
5580                                 sub_end += entry->offset;
5581                                 vm_map_unlock(dst_map);
5582
5583                                 kr = vm_map_overwrite_submap_recurse(
5584                                         entry->object.sub_map,
5585                                         sub_start,
5586                                         sub_end - sub_start);
5587                                 if(kr != KERN_SUCCESS)
5588                                         return kr;
5589                                 vm_map_lock(dst_map);
5590                         }
5591
5592                         if (dst_end <= entry->vme_end)
5593                                 goto start_overwrite;
5594                         if(!vm_map_lookup_entry(dst_map, local_end,
5595                                                 &entry)) {
5596                                 vm_map_unlock(dst_map);
5597                                 return(KERN_INVALID_ADDRESS);
5598                         }
5599                         next = entry->vme_next;
5600                 }
5601
5602                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5603                         vm_map_unlock(dst_map);
5604                         return(KERN_PROTECTION_FAILURE);
5605                 }
5606
5607                 /*
5608                  *      If the entry is in transition, we must wait
5609                  *      for it to exit that state.  Anything could happen
5610                  *      when we unlock the map, so start over.
5611                  */
5612                 if (entry->in_transition) {
5613
5614                         /*
5615                          * Say that we are waiting, and wait for entry.
5616                          */
5617                         entry->needs_wakeup = TRUE;
5618                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5619
5620                         goto start_pass_1;
5621                 }
5622
5623 /*
5624  *              our range is contained completely within this map entry
5625  */
5626                 if (dst_end <= entry->vme_end)
5627                         break;
5628 /*
5629  *              check that range specified is contiguous region
5630  */
5631                 if ((next == vm_map_to_entry(dst_map)) ||
5632                     (next->vme_start != entry->vme_end)) {
5633                         vm_map_unlock(dst_map);
5634                         return(KERN_INVALID_ADDRESS);
5635                 }
5636
5637
5638                 /*
5639                  *      Check for permanent objects in the destination.
5640                  */
5641                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5642                     ((!entry->object.vm_object->internal) ||
5643                      (entry->object.vm_object->true_share))) {
5644                         contains_permanent_objects = TRUE;
5645                 }
5646
5647                 entry = next;
5648         }/* for */
5649
5650 start_overwrite:
5651         /*
5652          *      If there are permanent objects in the destination, then
5653          *      the copy cannot be interrupted.
5654          */
5655
5656         if (interruptible && contains_permanent_objects) {
5657                 vm_map_unlock(dst_map);
5658                 return(KERN_FAILURE);   /* XXX */
5659         }
5660
5661         /*
5662          *
5663          *      Make a second pass, overwriting the data
5664          *      At the beginning of each loop iteration,
5665          *      the next entry to be overwritten is "tmp_entry"
5666          *      (initially, the value returned from the lookup above),
5667          *      and the starting address expected in that entry
5668          *      is "start".
5669          */
5670
5671         total_size = copy->size;
5672         if(encountered_sub_map) {
5673                 copy_size = 0;
5674                 /* re-calculate tmp_entry since we've had the map */
5675                 /* unlocked */
5676                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5677                         vm_map_unlock(dst_map);
5678                         return(KERN_INVALID_ADDRESS);
5679                 }
5680         } else {
5681                 copy_size = copy->size;
5682         }
5683
5684         base_addr = dst_addr;
5685         while(TRUE) {
5686                 /* deconstruct the copy object and do in parts */
5687                 /* only in sub_map, interruptable case */
5688                 vm_map_entry_t  copy_entry;
5689                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5690                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5691                 int             nentries;
5692                 int             remaining_entries = 0;
5693                 vm_map_offset_t new_offset = 0;
5694
5695                 for (entry = tmp_entry; copy_size == 0;) {
5696                         vm_map_entry_t  next;
5697
5698                         next = entry->vme_next;
5699
5700                         /* tmp_entry and base address are moved along */
5701                         /* each time we encounter a sub-map.  Otherwise */
5702                         /* entry can outpase tmp_entry, and the copy_size */
5703                         /* may reflect the distance between them */
5704                         /* if the current entry is found to be in transition */
5705                         /* we will start over at the beginning or the last */
5706                         /* encounter of a submap as dictated by base_addr */
5707                         /* we will zero copy_size accordingly. */
5708                         if (entry->in_transition) {
5709                                 /*
5710                                  * Say that we are waiting, and wait for entry.
5711                                  */
5712                                 entry->needs_wakeup = TRUE;
5713                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5714
5715                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5716                                                         &tmp_entry)) {
5717                                         vm_map_unlock(dst_map);
5718                                         return(KERN_INVALID_ADDRESS);
5719                                 }
5720                                 copy_size = 0;
5721                                 entry = tmp_entry;
5722                                 continue;
5723                         }
5724                         if(entry->is_sub_map) {
5725                                 vm_map_offset_t sub_start;
5726                                 vm_map_offset_t sub_end;
5727                                 vm_map_offset_t local_end;
5728
5729                                 if (entry->needs_copy) {
5730                                         /* if this is a COW submap */
5731                                         /* just back the range with a */
5732                                         /* anonymous entry */
5733                                         if(entry->vme_end < dst_end)
5734                                                 sub_end = entry->vme_end;
5735                                         else
5736                                                 sub_end = dst_end;
5737                                         if(entry->vme_start < base_addr)
5738                                                 sub_start = base_addr;
5739                                         else
5740                                                 sub_start = entry->vme_start;
5741                                         vm_map_clip_end(
5742                                                 dst_map, entry, sub_end);
5743                                         vm_map_clip_start(
5744                                                 dst_map, entry, sub_start);
5745                                         assert(!entry->use_pmap);
5746                                         entry->is_sub_map = FALSE;
5747                                         vm_map_deallocate(
5748                                                 entry->object.sub_map);
5749                                         entry->object.sub_map = NULL;
5750                                         entry->is_shared = FALSE;
5751                                         entry->needs_copy = FALSE;
5752                                         entry->offset = 0;
5753                                         /*
5754                                          * XXX FBDP
5755                                          * We should propagate the protections
5756                                          * of the submap entry here instead
5757                                          * of forcing them to VM_PROT_ALL...
5758                                          * Or better yet, we should inherit
5759                                          * the protection of the copy_entry.
5760                                          */
5761                                         entry->protection = VM_PROT_ALL;
5762                                         entry->max_protection = VM_PROT_ALL;
5763                                         entry->wired_count = 0;
5764                                         entry->user_wired_count = 0;
5765                                         if(entry->inheritance
5766                                            == VM_INHERIT_SHARE)
5767                                                 entry->inheritance = VM_INHERIT_COPY;
5768                                         continue;
5769                                 }
5770                                 /* first take care of any non-sub_map */
5771                                 /* entries to send */
5772                                 if(base_addr < entry->vme_start) {
5773                                         /* stuff to send */
5774                                         copy_size =
5775                                                 entry->vme_start - base_addr;
5776                                         break;
5777                                 }
5778                                 sub_start = entry->offset;
5779
5780                                 if(entry->vme_end < dst_end)
5781                                         sub_end = entry->vme_end;
5782                                 else
5783                                         sub_end = dst_end;
5784                                 sub_end -= entry->vme_start;
5785                                 sub_end += entry->offset;
5786                                 local_end = entry->vme_end;
5787                                 vm_map_unlock(dst_map);
5788                                 copy_size = sub_end - sub_start;
5789
5790                                 /* adjust the copy object */
5791                                 if (total_size > copy_size) {
5792                                         vm_map_size_t   local_size = 0;
5793                                         vm_map_size_t   entry_size;
5794
5795                                         nentries = 1;
5796                                         new_offset = copy->offset;
5797                                         copy_entry = vm_map_copy_first_entry(copy);
5798                                         while(copy_entry !=
5799                                               vm_map_copy_to_entry(copy)){
5800                                                 entry_size = copy_entry->vme_end -
5801                                                         copy_entry->vme_start;
5802                                                 if((local_size < copy_size) &&
5803                                                    ((local_size + entry_size)
5804                                                     >= copy_size)) {
5805                                                         vm_map_copy_clip_end(copy,
5806                                                                              copy_entry,
5807                                                                              copy_entry->vme_start +
5808                                                                              (copy_size - local_size));
5809                                                         entry_size = copy_entry->vme_end -
5810                                                                 copy_entry->vme_start;
5811                                                         local_size += entry_size;
5812                                                         new_offset += entry_size;
5813                                                 }
5814                                                 if(local_size >= copy_size) {
5815                                                         next_copy = copy_entry->vme_next;
5816                                                         copy_entry->vme_next =
5817                                                                 vm_map_copy_to_entry(copy);
5818                                                         previous_prev =
5819                                                                 copy->cpy_hdr.links.prev;
5820                                                         copy->cpy_hdr.links.prev = copy_entry;
5821                                                         copy->size = copy_size;
5822                                                         remaining_entries =
5823                                                                 copy->cpy_hdr.nentries;
5824                                                         remaining_entries -= nentries;
5825                                                         copy->cpy_hdr.nentries = nentries;
5826                                                         break;
5827                                                 } else {
5828                                                         local_size += entry_size;
5829                                                         new_offset += entry_size;
5830                                                         nentries++;
5831                                                 }
5832                                                 copy_entry = copy_entry->vme_next;
5833                                         }
5834                                 }
5835
5836                                 if((entry->use_pmap) && (pmap == NULL)) {
5837                                         kr = vm_map_copy_overwrite_nested(
5838                                                 entry->object.sub_map,
5839                                                 sub_start,
5840                                                 copy,
5841                                                 interruptible,
5842                                                 entry->object.sub_map->pmap,
5843                                                 TRUE);
5844                                 } else if (pmap != NULL) {
5845                                         kr = vm_map_copy_overwrite_nested(
5846                                                 entry->object.sub_map,
5847                                                 sub_start,
5848                                                 copy,
5849                                                 interruptible, pmap,
5850                                                 TRUE);
5851                                 } else {
5852                                         kr = vm_map_copy_overwrite_nested(
5853                                                 entry->object.sub_map,
5854                                                 sub_start,
5855                                                 copy,
5856                                                 interruptible,
5857                                                 dst_map->pmap,
5858                                                 TRUE);
5859                                 }
5860                                 if(kr != KERN_SUCCESS) {
5861                                         if(next_copy != NULL) {
5862                                                 copy->cpy_hdr.nentries +=
5863                                                         remaining_entries;
5864                                                 copy->cpy_hdr.links.prev->vme_next =
5865                                                         next_copy;
5866                                                 copy->cpy_hdr.links.prev
5867                                                         = previous_prev;
5868                                                 copy->size = total_size;
5869                                         }
5870                                         return kr;
5871                                 }
5872                                 if (dst_end <= local_end) {
5873                                         return(KERN_SUCCESS);
5874                                 }
5875                                 /* otherwise copy no longer exists, it was */
5876                                 /* destroyed after successful copy_overwrite */
5877                                 copy = (vm_map_copy_t)
5878                                         zalloc(vm_map_copy_zone);
5879                                 vm_map_copy_first_entry(copy) =
5880                                         vm_map_copy_last_entry(copy) =
5881                                         vm_map_copy_to_entry(copy);
5882                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
5883                                 copy->offset = new_offset;
5884
5885                                 /*
5886                                  * XXX FBDP
5887                                  * this does not seem to deal with
5888                                  * the VM map store (R&B tree)
5889                                  */
5890
5891                                 total_size -= copy_size;
5892                                 copy_size = 0;
5893                                 /* put back remainder of copy in container */
5894                                 if(next_copy != NULL) {
5895                                         copy->cpy_hdr.nentries = remaining_entries;
5896                                         copy->cpy_hdr.links.next = next_copy;
5897                                         copy->cpy_hdr.links.prev = previous_prev;
5898                                         copy->size = total_size;
5899                                         next_copy->vme_prev =
5900                                                 vm_map_copy_to_entry(copy);
5901                                         next_copy = NULL;
5902                                 }
5903                                 base_addr = local_end;
5904                                 vm_map_lock(dst_map);
5905                                 if(!vm_map_lookup_entry(dst_map,
5906                                                         local_end, &tmp_entry)) {
5907                                         vm_map_unlock(dst_map);
5908                                         return(KERN_INVALID_ADDRESS);
5909                                 }
5910                                 entry = tmp_entry;
5911                                 continue;
5912                         }
5913                         if (dst_end <= entry->vme_end) {
5914                                 copy_size = dst_end - base_addr;
5915                                 break;
5916                         }
5917
5918                         if ((next == vm_map_to_entry(dst_map)) ||
5919                             (next->vme_start != entry->vme_end)) {
5920                                 vm_map_unlock(dst_map);
5921                                 return(KERN_INVALID_ADDRESS);
5922                         }
5923
5924                         entry = next;
5925                 }/* for */
5926
5927                 next_copy = NULL;
5928                 nentries = 1;
5929
5930                 /* adjust the copy object */
5931                 if (total_size > copy_size) {
5932                         vm_map_size_t   local_size = 0;
5933                         vm_map_size_t   entry_size;
5934
5935                         new_offset = copy->offset;
5936                         copy_entry = vm_map_copy_first_entry(copy);
5937                         while(copy_entry != vm_map_copy_to_entry(copy)) {
5938                                 entry_size = copy_entry->vme_end -
5939                                         copy_entry->vme_start;
5940                                 if((local_size < copy_size) &&
5941                                    ((local_size + entry_size)
5942                                     >= copy_size)) {
5943                                         vm_map_copy_clip_end(copy, copy_entry,
5944                                                              copy_entry->vme_start +
5945                                                              (copy_size - local_size));
5946                                         entry_size = copy_entry->vme_end -
5947                                                 copy_entry->vme_start;
5948                                         local_size += entry_size;
5949                                         new_offset += entry_size;
5950                                 }
5951                                 if(local_size >= copy_size) {
5952                                         next_copy = copy_entry->vme_next;
5953                                         copy_entry->vme_next =
5954                                                 vm_map_copy_to_entry(copy);
5955                                         previous_prev =
5956                                                 copy->cpy_hdr.links.prev;
5957                                         copy->cpy_hdr.links.prev = copy_entry;
5958                                         copy->size = copy_size;
5959                                         remaining_entries =
5960                                                 copy->cpy_hdr.nentries;
5961                                         remaining_entries -= nentries;
5962                                         copy->cpy_hdr.nentries = nentries;
5963                                         break;
5964                                 } else {
5965                                         local_size += entry_size;
5966                                         new_offset += entry_size;
5967                                         nentries++;
5968                                 }
5969                                 copy_entry = copy_entry->vme_next;
5970                         }
5971                 }
5972
5973                 if (aligned) {
5974                         pmap_t  local_pmap;
5975
5976                         if(pmap)
5977                                 local_pmap = pmap;
5978                         else
5979                                 local_pmap = dst_map->pmap;
5980
5981                         if ((kr =  vm_map_copy_overwrite_aligned(
5982                                      dst_map, tmp_entry, copy,
5983                                      base_addr, local_pmap)) != KERN_SUCCESS) {
5984                                 if(next_copy != NULL) {
5985                                         copy->cpy_hdr.nentries +=
5986                                                 remaining_entries;
5987                                         copy->cpy_hdr.links.prev->vme_next =
5988                                                 next_copy;
5989                                         copy->cpy_hdr.links.prev =
5990                                                 previous_prev;
5991                                         copy->size += copy_size;
5992                                 }
5993                                 return kr;
5994                         }
5995                         vm_map_unlock(dst_map);
5996                 } else {
5997                         /*
5998                          * Performance gain:
5999                          *
6000                          * if the copy and dst address are misaligned but the same
6001                          * offset within the page we can copy_not_aligned the
6002                          * misaligned parts and copy aligned the rest.  If they are
6003                          * aligned but len is unaligned we simply need to copy
6004                          * the end bit unaligned.  We'll need to split the misaligned
6005                          * bits of the region in this case !
6006                          */
6007                         /* ALWAYS UNLOCKS THE dst_map MAP */
6008                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
6009                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6010                                 if(next_copy != NULL) {
6011                                         copy->cpy_hdr.nentries +=
6012                                                 remaining_entries;
6013                                         copy->cpy_hdr.links.prev->vme_next =
6014                                                 next_copy;
6015                                         copy->cpy_hdr.links.prev =
6016                                                 previous_prev;
6017                                         copy->size += copy_size;
6018                                 }
6019                                 return kr;
6020                         }
6021                 }
6022                 total_size -= copy_size;
6023                 if(total_size == 0)
6024                         break;
6025                 base_addr += copy_size;
6026                 copy_size = 0;
6027                 copy->offset = new_offset;
6028                 if(next_copy != NULL) {
6029                         copy->cpy_hdr.nentries = remaining_entries;
6030                         copy->cpy_hdr.links.next = next_copy;
6031                         copy->cpy_hdr.links.prev = previous_prev;
6032                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6033                         copy->size = total_size;
6034                 }
6035                 vm_map_lock(dst_map);
6036                 while(TRUE) {
6037                         if (!vm_map_lookup_entry(dst_map,
6038                                                  base_addr, &tmp_entry)) {
6039                                 vm_map_unlock(dst_map);
6040                                 return(KERN_INVALID_ADDRESS);
6041                         }
6042                         if (tmp_entry->in_transition) {
6043                                 entry->needs_wakeup = TRUE;
6044                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6045                         } else {
6046                                 break;
6047                         }
6048                 }
6049                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6050
6051                 entry = tmp_entry;
6052         } /* while */
6053
6054         /*
6055          *      Throw away the vm_map_copy object
6056          */
6057         if (discard_on_success)
6058                 vm_map_copy_discard(copy);
6059
6060         return(KERN_SUCCESS);
6061 }/* vm_map_copy_overwrite */
6062
6063 kern_return_t
6064 vm_map_copy_overwrite(
6065         vm_map_t        dst_map,
6066         vm_map_offset_t dst_addr,
6067         vm_map_copy_t   copy,
6068         boolean_t       interruptible)
6069 {
6070         vm_map_size_t   head_size, tail_size;
6071         vm_map_copy_t   head_copy, tail_copy;
6072         vm_map_offset_t head_addr, tail_addr;
6073         vm_map_entry_t  entry;
6074         kern_return_t   kr;
6075
6076         head_size = 0;
6077         tail_size = 0;
6078         head_copy = NULL;
6079         tail_copy = NULL;
6080         head_addr = 0;
6081         tail_addr = 0;
6082
6083         if (interruptible ||
6084             copy == VM_MAP_COPY_NULL ||
6085             copy->type != VM_MAP_COPY_ENTRY_LIST) {
6086                 /*
6087                  * We can't split the "copy" map if we're interruptible
6088                  * or if we don't have a "copy" map...
6089                  */
6090         blunt_copy:
6091                 return vm_map_copy_overwrite_nested(dst_map,
6092                                                     dst_addr,
6093                                                     copy,
6094                                                     interruptible,
6095                                                     (pmap_t) NULL,
6096                                                     TRUE);
6097         }
6098
6099         if (copy->size < 3 * PAGE_SIZE) {
6100                 /*
6101                  * Too small to bother with optimizing...
6102                  */
6103                 goto blunt_copy;
6104         }
6105
6106         if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6107                 /*
6108                  * Incompatible mis-alignment of source and destination...
6109                  */
6110                 goto blunt_copy;
6111         }
6112
6113         /*
6114          * Proper alignment or identical mis-alignment at the beginning.
6115          * Let's try and do a small unaligned copy first (if needed)
6116          * and then an aligned copy for the rest.
6117          */
6118         if (!page_aligned(dst_addr)) {
6119                 head_addr = dst_addr;
6120                 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6121         }
6122         if (!page_aligned(copy->offset + copy->size)) {
6123                 /*
6124                  * Mis-alignment at the end.
6125                  * Do an aligned copy up to the last page and
6126                  * then an unaligned copy for the remaining bytes.
6127                  */
6128                 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6129                 tail_addr = dst_addr + copy->size - tail_size;
6130         }
6131
6132         if (head_size + tail_size == copy->size) {
6133                 /*
6134                  * It's all unaligned, no optimization possible...
6135                  */
6136                 goto blunt_copy;
6137         }
6138
6139         /*
6140          * Can't optimize if there are any submaps in the
6141          * destination due to the way we free the "copy" map
6142          * progressively in vm_map_copy_overwrite_nested()
6143          * in that case.
6144          */
6145         vm_map_lock_read(dst_map);
6146         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6147                 vm_map_unlock_read(dst_map);
6148                 goto blunt_copy;
6149         }
6150         for (;
6151              (entry != vm_map_copy_to_entry(copy) &&
6152               entry->vme_start < dst_addr + copy->size);
6153              entry = entry->vme_next) {
6154                 if (entry->is_sub_map) {
6155                         vm_map_unlock_read(dst_map);
6156                         goto blunt_copy;
6157                 }
6158         }
6159         vm_map_unlock_read(dst_map);
6160
6161         if (head_size) {
6162                 /*
6163                  * Unaligned copy of the first "head_size" bytes, to reach
6164                  * a page boundary.
6165                  */
6166
6167                 /*
6168                  * Extract "head_copy" out of "copy".
6169                  */
6170                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6171                 vm_map_copy_first_entry(head_copy) =
6172                         vm_map_copy_to_entry(head_copy);
6173                 vm_map_copy_last_entry(head_copy) =
6174                         vm_map_copy_to_entry(head_copy);
6175                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6176                 head_copy->cpy_hdr.nentries = 0;
6177                 head_copy->cpy_hdr.entries_pageable =
6178                         copy->cpy_hdr.entries_pageable;
6179                 vm_map_store_init(&head_copy->cpy_hdr);
6180
6181                 head_copy->offset = copy->offset;
6182                 head_copy->size = head_size;
6183
6184                 copy->offset += head_size;
6185                 copy->size -= head_size;
6186
6187                 entry = vm_map_copy_first_entry(copy);
6188                 vm_map_copy_clip_end(copy, entry, copy->offset);
6189                 vm_map_copy_entry_unlink(copy, entry);
6190                 vm_map_copy_entry_link(head_copy,
6191                                        vm_map_copy_to_entry(head_copy),
6192                                        entry);
6193
6194                 /*
6195                  * Do the unaligned copy.
6196                  */
6197                 kr = vm_map_copy_overwrite_nested(dst_map,
6198                                                   head_addr,
6199                                                   head_copy,
6200                                                   interruptible,
6201                                                   (pmap_t) NULL,
6202                                                   FALSE);
6203                 if (kr != KERN_SUCCESS)
6204                         goto done;
6205         }
6206
6207         if (tail_size) {
6208                 /*
6209                  * Extract "tail_copy" out of "copy".
6210                  */
6211                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6212                 vm_map_copy_first_entry(tail_copy) =
6213                         vm_map_copy_to_entry(tail_copy);
6214                 vm_map_copy_last_entry(tail_copy) =
6215                         vm_map_copy_to_entry(tail_copy);
6216                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6217                 tail_copy->cpy_hdr.nentries = 0;
6218                 tail_copy->cpy_hdr.entries_pageable =
6219                         copy->cpy_hdr.entries_pageable;
6220                 vm_map_store_init(&tail_copy->cpy_hdr);
6221
6222                 tail_copy->offset = copy->offset + copy->size - tail_size;
6223                 tail_copy->size = tail_size;
6224
6225                 copy->size -= tail_size;
6226
6227                 entry = vm_map_copy_last_entry(copy);
6228                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6229                 entry = vm_map_copy_last_entry(copy);
6230                 vm_map_copy_entry_unlink(copy, entry);
6231                 vm_map_copy_entry_link(tail_copy,
6232                                        vm_map_copy_last_entry(tail_copy),
6233                                        entry);
6234         }
6235
6236         /*
6237          * Copy most (or possibly all) of the data.
6238          */
6239         kr = vm_map_copy_overwrite_nested(dst_map,
6240                                           dst_addr + head_size,
6241                                           copy,
6242                                           interruptible,
6243                                           (pmap_t) NULL,
6244                                           FALSE);
6245         if (kr != KERN_SUCCESS) {
6246                 goto done;
6247         }
6248
6249         if (tail_size) {
6250                 kr = vm_map_copy_overwrite_nested(dst_map,
6251                                                   tail_addr,
6252                                                   tail_copy,
6253                                                   interruptible,
6254                                                   (pmap_t) NULL,
6255                                                   FALSE);
6256         }
6257
6258 done:
6259         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6260         if (kr == KERN_SUCCESS) {
6261                 /*
6262                  * Discard all the copy maps.
6263                  */
6264                 if (head_copy) {
6265                         vm_map_copy_discard(head_copy);
6266                         head_copy = NULL;
6267                 }
6268                 vm_map_copy_discard(copy);
6269                 if (tail_copy) {
6270                         vm_map_copy_discard(tail_copy);
6271                         tail_copy = NULL;
6272                 }
6273         } else {
6274                 /*
6275                  * Re-assemble the original copy map.
6276                  */
6277                 if (head_copy) {
6278                         entry = vm_map_copy_first_entry(head_copy);
6279                         vm_map_copy_entry_unlink(head_copy, entry);
6280                         vm_map_copy_entry_link(copy,
6281                                                vm_map_copy_to_entry(copy),
6282                                                entry);
6283                         copy->offset -= head_size;
6284                         copy->size += head_size;
6285                         vm_map_copy_discard(head_copy);
6286                         head_copy = NULL;
6287                 }
6288                 if (tail_copy) {
6289                         entry = vm_map_copy_last_entry(tail_copy);
6290                         vm_map_copy_entry_unlink(tail_copy, entry);
6291                         vm_map_copy_entry_link(copy,
6292                                                vm_map_copy_last_entry(copy),
6293                                                entry);
6294                         copy->size += tail_size;
6295                         vm_map_copy_discard(tail_copy);
6296                         tail_copy = NULL;
6297                 }
6298         }
6299         return kr;
6300 }
6301
6302
6303 /*
6304  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6305  *
6306  *      Decription:
6307  *      Physically copy unaligned data
6308  *
6309  *      Implementation:
6310  *      Unaligned parts of pages have to be physically copied.  We use
6311  *      a modified form of vm_fault_copy (which understands none-aligned
6312  *      page offsets and sizes) to do the copy.  We attempt to copy as
6313  *      much memory in one go as possibly, however vm_fault_copy copies
6314  *      within 1 memory object so we have to find the smaller of "amount left"
6315  *      "source object data size" and "target object data size".  With
6316  *      unaligned data we don't need to split regions, therefore the source
6317  *      (copy) object should be one map entry, the target range may be split
6318  *      over multiple map entries however.  In any event we are pessimistic
6319  *      about these assumptions.
6320  *
6321  *      Assumptions:
6322  *      dst_map is locked on entry and is return locked on success,
6323  *      unlocked on error.
6324  */
6325
6326 static kern_return_t
6327 vm_map_copy_overwrite_unaligned(
6328         vm_map_t        dst_map,
6329         vm_map_entry_t  entry,
6330         vm_map_copy_t   copy,
6331         vm_map_offset_t start)
6332 {
6333         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6334         vm_map_version_t        version;
6335         vm_object_t             dst_object;
6336         vm_object_offset_t      dst_offset;
6337         vm_object_offset_t      src_offset;
6338         vm_object_offset_t      entry_offset;
6339         vm_map_offset_t         entry_end;
6340         vm_map_size_t           src_size,
6341                                 dst_size,
6342                                 copy_size,
6343                                 amount_left;
6344         kern_return_t           kr = KERN_SUCCESS;
6345
6346         vm_map_lock_write_to_read(dst_map);
6347
6348         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6349         amount_left = copy->size;
6350 /*
6351  *      unaligned so we never clipped this entry, we need the offset into
6352  *      the vm_object not just the data.
6353  */
6354         while (amount_left > 0) {
6355
6356                 if (entry == vm_map_to_entry(dst_map)) {
6357                         vm_map_unlock_read(dst_map);
6358                         return KERN_INVALID_ADDRESS;
6359                 }
6360
6361                 /* "start" must be within the current map entry */
6362                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6363
6364                 dst_offset = start - entry->vme_start;
6365
6366                 dst_size = entry->vme_end - start;
6367
6368                 src_size = copy_entry->vme_end -
6369                         (copy_entry->vme_start + src_offset);
6370
6371                 if (dst_size < src_size) {
6372 /*
6373  *                      we can only copy dst_size bytes before
6374  *                      we have to get the next destination entry
6375  */
6376                         copy_size = dst_size;
6377                 } else {
6378 /*
6379  *                      we can only copy src_size bytes before
6380  *                      we have to get the next source copy entry
6381  */
6382                         copy_size = src_size;
6383                 }
6384
6385                 if (copy_size > amount_left) {
6386                         copy_size = amount_left;
6387                 }
6388 /*
6389  *              Entry needs copy, create a shadow shadow object for
6390  *              Copy on write region.
6391  */
6392                 if (entry->needs_copy &&
6393                     ((entry->protection & VM_PROT_WRITE) != 0))
6394                 {
6395                         if (vm_map_lock_read_to_write(dst_map)) {
6396                                 vm_map_lock_read(dst_map);
6397                                 goto RetryLookup;
6398                         }
6399                         vm_object_shadow(&entry->object.vm_object,
6400                                          &entry->offset,
6401                                          (vm_map_size_t)(entry->vme_end
6402                                                          - entry->vme_start));
6403                         entry->needs_copy = FALSE;
6404                         vm_map_lock_write_to_read(dst_map);
6405                 }
6406                 dst_object = entry->object.vm_object;
6407 /*
6408  *              unlike with the virtual (aligned) copy we're going
6409  *              to fault on it therefore we need a target object.
6410  */
6411                 if (dst_object == VM_OBJECT_NULL) {
6412                         if (vm_map_lock_read_to_write(dst_map)) {
6413                                 vm_map_lock_read(dst_map);
6414                                 goto RetryLookup;
6415                         }
6416                         dst_object = vm_object_allocate((vm_map_size_t)
6417                                                         entry->vme_end - entry->vme_start);
6418                         entry->object.vm_object = dst_object;
6419                         entry->offset = 0;
6420                         vm_map_lock_write_to_read(dst_map);
6421                 }
6422 /*
6423  *              Take an object reference and unlock map. The "entry" may
6424  *              disappear or change when the map is unlocked.
6425  */
6426                 vm_object_reference(dst_object);
6427                 version.main_timestamp = dst_map->timestamp;
6428                 entry_offset = entry->offset;
6429                 entry_end = entry->vme_end;
6430                 vm_map_unlock_read(dst_map);
6431 /*
6432  *              Copy as much as possible in one pass
6433  */
6434                 kr = vm_fault_copy(
6435                         copy_entry->object.vm_object,
6436                         copy_entry->offset + src_offset,
6437                         &copy_size,
6438                         dst_object,
6439                         entry_offset + dst_offset,
6440                         dst_map,
6441                         &version,
6442                         THREAD_UNINT );
6443
6444                 start += copy_size;
6445                 src_offset += copy_size;
6446                 amount_left -= copy_size;
6447 /*
6448  *              Release the object reference
6449  */
6450                 vm_object_deallocate(dst_object);
6451 /*
6452  *              If a hard error occurred, return it now
6453  */
6454                 if (kr != KERN_SUCCESS)
6455                         return kr;
6456
6457                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6458                     || amount_left == 0)
6459                 {
6460 /*
6461  *                      all done with this copy entry, dispose.
6462  */
6463                         vm_map_copy_entry_unlink(copy, copy_entry);
6464                         vm_object_deallocate(copy_entry->object.vm_object);
6465                         vm_map_copy_entry_dispose(copy, copy_entry);
6466
6467                         if ((copy_entry = vm_map_copy_first_entry(copy))
6468                             == vm_map_copy_to_entry(copy) && amount_left) {
6469 /*
6470  *                              not finished copying but run out of source
6471  */
6472                                 return KERN_INVALID_ADDRESS;
6473                         }
6474                         src_offset = 0;
6475                 }
6476
6477                 if (amount_left == 0)
6478                         return KERN_SUCCESS;
6479
6480                 vm_map_lock_read(dst_map);
6481                 if (version.main_timestamp == dst_map->timestamp) {
6482                         if (start == entry_end) {
6483 /*
6484  *                              destination region is split.  Use the version
6485  *                              information to avoid a lookup in the normal
6486  *                              case.
6487  */
6488                                 entry = entry->vme_next;
6489 /*
6490  *                              should be contiguous. Fail if we encounter
6491  *                              a hole in the destination.
6492  */
6493                                 if (start != entry->vme_start) {
6494                                         vm_map_unlock_read(dst_map);
6495                                         return KERN_INVALID_ADDRESS ;
6496                                 }
6497                         }
6498                 } else {
6499 /*
6500  *                      Map version check failed.
6501  *                      we must lookup the entry because somebody
6502  *                      might have changed the map behind our backs.
6503  */
6504                 RetryLookup:
6505                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6506                         {
6507                                 vm_map_unlock_read(dst_map);
6508                                 return KERN_INVALID_ADDRESS ;
6509                         }
6510                 }
6511         }/* while */
6512
6513         return KERN_SUCCESS;
6514 }/* vm_map_copy_overwrite_unaligned */
6515
6516 /*
6517  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6518  *
6519  *      Description:
6520  *      Does all the vm_trickery possible for whole pages.
6521  *
6522  *      Implementation:
6523  *
6524  *      If there are no permanent objects in the destination,
6525  *      and the source and destination map entry zones match,
6526  *      and the destination map entry is not shared,
6527  *      then the map entries can be deleted and replaced
6528  *      with those from the copy.  The following code is the
6529  *      basic idea of what to do, but there are lots of annoying
6530  *      little details about getting protection and inheritance
6531  *      right.  Should add protection, inheritance, and sharing checks
6532  *      to the above pass and make sure that no wiring is involved.
6533  */
6534
6535 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6536 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6537 int vm_map_copy_overwrite_aligned_src_large = 0;
6538
6539 static kern_return_t
6540 vm_map_copy_overwrite_aligned(
6541         vm_map_t        dst_map,
6542         vm_map_entry_t  tmp_entry,
6543         vm_map_copy_t   copy,
6544         vm_map_offset_t start,
6545         __unused pmap_t pmap)
6546 {
6547         vm_object_t     object;
6548         vm_map_entry_t  copy_entry;
6549         vm_map_size_t   copy_size;
6550         vm_map_size_t   size;
6551         vm_map_entry_t  entry;
6552
6553         while ((copy_entry = vm_map_copy_first_entry(copy))
6554                != vm_map_copy_to_entry(copy))
6555         {
6556                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6557
6558                 entry = tmp_entry;
6559                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6560                 if (entry == vm_map_to_entry(dst_map)) {
6561                         vm_map_unlock(dst_map);
6562                         return KERN_INVALID_ADDRESS;
6563                 }
6564                 size = (entry->vme_end - entry->vme_start);
6565                 /*
6566                  *      Make sure that no holes popped up in the
6567                  *      address map, and that the protection is
6568                  *      still valid, in case the map was unlocked
6569                  *      earlier.
6570                  */
6571
6572                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6573                                                     && !entry->needs_copy)) {
6574                         vm_map_unlock(dst_map);
6575                         return(KERN_INVALID_ADDRESS);
6576                 }
6577                 assert(entry != vm_map_to_entry(dst_map));
6578
6579                 /*
6580                  *      Check protection again
6581                  */
6582
6583                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6584                         vm_map_unlock(dst_map);
6585                         return(KERN_PROTECTION_FAILURE);
6586                 }
6587
6588                 /*
6589                  *      Adjust to source size first
6590                  */
6591
6592                 if (copy_size < size) {
6593                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6594                         size = copy_size;
6595                 }
6596
6597                 /*
6598                  *      Adjust to destination size
6599                  */
6600
6601                 if (size < copy_size) {
6602                         vm_map_copy_clip_end(copy, copy_entry,
6603                                              copy_entry->vme_start + size);
6604                         copy_size = size;
6605                 }
6606
6607                 assert((entry->vme_end - entry->vme_start) == size);
6608                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6609                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6610
6611                 /*
6612                  *      If the destination contains temporary unshared memory,
6613                  *      we can perform the copy by throwing it away and
6614                  *      installing the source data.
6615                  */
6616
6617                 object = entry->object.vm_object;
6618                 if ((!entry->is_shared &&
6619                      ((object == VM_OBJECT_NULL) ||
6620                       (object->internal && !object->true_share))) ||
6621                     entry->needs_copy) {
6622                         vm_object_t     old_object = entry->object.vm_object;
6623                         vm_object_offset_t      old_offset = entry->offset;
6624                         vm_object_offset_t      offset;
6625
6626                         /*
6627                          * Ensure that the source and destination aren't
6628                          * identical
6629                          */
6630                         if (old_object == copy_entry->object.vm_object &&
6631                             old_offset == copy_entry->offset) {
6632                                 vm_map_copy_entry_unlink(copy, copy_entry);
6633                                 vm_map_copy_entry_dispose(copy, copy_entry);
6634
6635                                 if (old_object != VM_OBJECT_NULL)
6636                                         vm_object_deallocate(old_object);
6637
6638                                 start = tmp_entry->vme_end;
6639                                 tmp_entry = tmp_entry->vme_next;
6640                                 continue;
6641                         }
6642
6643 #if !CONFIG_EMBEDDED
6644 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
6645 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
6646                         if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6647                             copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6648                             copy_size <= __TRADEOFF1_COPY_SIZE) {
6649                                 /*
6650                                  * Virtual vs. Physical copy tradeoff #1.
6651                                  *
6652                                  * Copying only a few pages out of a large
6653                                  * object:  do a physical copy instead of
6654                                  * a virtual copy, to avoid possibly keeping
6655                                  * the entire large object alive because of
6656                                  * those few copy-on-write pages.
6657                                  */
6658                                 vm_map_copy_overwrite_aligned_src_large++;
6659                                 goto slow_copy;
6660                         }
6661 #endif /* !CONFIG_EMBEDDED */
6662
6663                         if (entry->alias >= VM_MEMORY_MALLOC &&
6664                             entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6665                                 vm_object_t new_object, new_shadow;
6666
6667                                 /*
6668                                  * We're about to map something over a mapping
6669                                  * established by malloc()...
6670                                  */
6671                                 new_object = copy_entry->object.vm_object;
6672                                 if (new_object != VM_OBJECT_NULL) {
6673                                         vm_object_lock_shared(new_object);
6674                                 }
6675                                 while (new_object != VM_OBJECT_NULL &&
6676 #if !CONFIG_EMBEDDED
6677                                        !new_object->true_share &&
6678                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6679 #endif /* !CONFIG_EMBEDDED */
6680                                        new_object->internal) {
6681                                         new_shadow = new_object->shadow;
6682                                         if (new_shadow == VM_OBJECT_NULL) {
6683                                                 break;
6684                                         }
6685                                         vm_object_lock_shared(new_shadow);
6686                                         vm_object_unlock(new_object);
6687                                         new_object = new_shadow;
6688                                 }
6689                                 if (new_object != VM_OBJECT_NULL) {
6690                                         if (!new_object->internal) {
6691                                                 /*
6692                                                  * The new mapping is backed
6693                                                  * by an external object.  We
6694                                                  * don't want malloc'ed memory
6695                                                  * to be replaced with such a
6696                                                  * non-anonymous mapping, so
6697                                                  * let's go off the optimized
6698                                                  * path...
6699                                                  */
6700                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
6701                                                 vm_object_unlock(new_object);
6702                                                 goto slow_copy;
6703                                         }
6704 #if !CONFIG_EMBEDDED
6705                                         if (new_object->true_share ||
6706                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6707                                                 /*
6708                                                  * Same if there's a "true_share"
6709                                                  * object in the shadow chain, or
6710                                                  * an object with a non-default
6711                                                  * (SYMMETRIC) copy strategy.
6712                                                  */
6713                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
6714                                                 vm_object_unlock(new_object);
6715                                                 goto slow_copy;
6716                                         }
6717 #endif /* !CONFIG_EMBEDDED */
6718                                         vm_object_unlock(new_object);
6719                                 }
6720                                 /*
6721                                  * The new mapping is still backed by
6722                                  * anonymous (internal) memory, so it's
6723                                  * OK to substitute it for the original
6724                                  * malloc() mapping.
6725                                  */
6726                         }
6727
6728                         if (old_object != VM_OBJECT_NULL) {
6729                                 if(entry->is_sub_map) {
6730                                         if(entry->use_pmap) {
6731 #ifndef NO_NESTED_PMAP
6732                                                 pmap_unnest(dst_map->pmap,
6733                                                             (addr64_t)entry->vme_start,
6734                                                             entry->vme_end - entry->vme_start);
6735 #endif  /* NO_NESTED_PMAP */
6736                                                 if(dst_map->mapped) {
6737                                                         /* clean up parent */
6738                                                         /* map/maps */
6739                                                         vm_map_submap_pmap_clean(
6740                                                                 dst_map, entry->vme_start,
6741                                                                 entry->vme_end,
6742                                                                 entry->object.sub_map,
6743                                                                 entry->offset);
6744                                                 }
6745                                         } else {
6746                                                 vm_map_submap_pmap_clean(
6747                                                         dst_map, entry->vme_start,
6748                                                         entry->vme_end,
6749                                                         entry->object.sub_map,
6750                                                         entry->offset);
6751                                         }
6752                                         vm_map_deallocate(
6753                                                 entry->object.sub_map);
6754                                 } else {
6755                                         if(dst_map->mapped) {
6756                                                 vm_object_pmap_protect(
6757                                                         entry->object.vm_object,
6758                                                         entry->offset,
6759                                                         entry->vme_end
6760                                                         - entry->vme_start,
6761                                                         PMAP_NULL,
6762                                                         entry->vme_start,
6763                                                         VM_PROT_NONE);
6764                                         } else {
6765                                                 pmap_remove(dst_map->pmap,
6766                                                             (addr64_t)(entry->vme_start),
6767                                                             (addr64_t)(entry->vme_end));
6768                                         }
6769                                         vm_object_deallocate(old_object);
6770                                 }
6771                         }
6772
6773                         entry->is_sub_map = FALSE;
6774                         entry->object = copy_entry->object;
6775                         object = entry->object.vm_object;
6776                         entry->needs_copy = copy_entry->needs_copy;
6777                         entry->wired_count = 0;
6778                         entry->user_wired_count = 0;
6779                         offset = entry->offset = copy_entry->offset;
6780
6781                         vm_map_copy_entry_unlink(copy, copy_entry);
6782                         vm_map_copy_entry_dispose(copy, copy_entry);
6783
6784                         /*
6785                          * we could try to push pages into the pmap at this point, BUT
6786                          * this optimization only saved on average 2 us per page if ALL
6787                          * the pages in the source were currently mapped
6788                          * and ALL the pages in the dest were touched, if there were fewer
6789                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6790                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6791                          */
6792
6793                         /*
6794                          *      Set up for the next iteration.  The map
6795                          *      has not been unlocked, so the next
6796                          *      address should be at the end of this
6797                          *      entry, and the next map entry should be
6798                          *      the one following it.
6799                          */
6800
6801                         start = tmp_entry->vme_end;
6802                         tmp_entry = tmp_entry->vme_next;
6803                 } else {
6804                         vm_map_version_t        version;
6805                         vm_object_t             dst_object;
6806                         vm_object_offset_t      dst_offset;
6807                         kern_return_t           r;
6808
6809                 slow_copy:
6810                         if (entry->needs_copy) {
6811                                 vm_object_shadow(&entry->object.vm_object,
6812                                                  &entry->offset,
6813                                                  (entry->vme_end -
6814                                                   entry->vme_start));
6815                                 entry->needs_copy = FALSE;
6816                         }
6817
6818                         dst_object = entry->object.vm_object;
6819                         dst_offset = entry->offset;
6820
6821                         /*
6822                          *      Take an object reference, and record
6823                          *      the map version information so that the
6824                          *      map can be safely unlocked.
6825                          */
6826
6827                         if (dst_object == VM_OBJECT_NULL) {
6828                                 /*
6829                                  * We would usually have just taken the
6830                                  * optimized path above if the destination
6831                                  * object has not been allocated yet.  But we
6832                                  * now disable that optimization if the copy
6833                                  * entry's object is not backed by anonymous
6834                                  * memory to avoid replacing malloc'ed
6835                                  * (i.e. re-usable) anonymous memory with a
6836                                  * not-so-anonymous mapping.
6837                                  * So we have to handle this case here and
6838                                  * allocate a new VM object for this map entry.
6839                                  */
6840                                 dst_object = vm_object_allocate(
6841                                         entry->vme_end - entry->vme_start);
6842                                 dst_offset = 0;
6843                                 entry->object.vm_object = dst_object;
6844                                 entry->offset = dst_offset;
6845
6846                         }
6847
6848                         vm_object_reference(dst_object);
6849
6850                         /* account for unlock bumping up timestamp */
6851                         version.main_timestamp = dst_map->timestamp + 1;
6852
6853                         vm_map_unlock(dst_map);
6854
6855                         /*
6856                          *      Copy as much as possible in one pass
6857                          */
6858
6859                         copy_size = size;
6860                         r = vm_fault_copy(
6861                                 copy_entry->object.vm_object,
6862                                 copy_entry->offset,
6863                                 &copy_size,
6864                                 dst_object,
6865                                 dst_offset,
6866                                 dst_map,
6867                                 &version,
6868                                 THREAD_UNINT );
6869
6870                         /*
6871                          *      Release the object reference
6872                          */
6873
6874                         vm_object_deallocate(dst_object);
6875
6876                         /*
6877                          *      If a hard error occurred, return it now
6878                          */
6879
6880                         if (r != KERN_SUCCESS)
6881                                 return(r);
6882
6883                         if (copy_size != 0) {
6884                                 /*
6885                                  *      Dispose of the copied region
6886                                  */
6887
6888                                 vm_map_copy_clip_end(copy, copy_entry,
6889                                                      copy_entry->vme_start + copy_size);
6890                                 vm_map_copy_entry_unlink(copy, copy_entry);
6891                                 vm_object_deallocate(copy_entry->object.vm_object);
6892                                 vm_map_copy_entry_dispose(copy, copy_entry);
6893                         }
6894
6895                         /*
6896                          *      Pick up in the destination map where we left off.
6897                          *
6898                          *      Use the version information to avoid a lookup
6899                          *      in the normal case.
6900                          */
6901
6902                         start += copy_size;
6903                         vm_map_lock(dst_map);
6904                         if (version.main_timestamp == dst_map->timestamp &&
6905                             copy_size != 0) {
6906                                 /* We can safely use saved tmp_entry value */
6907
6908                                 vm_map_clip_end(dst_map, tmp_entry, start);
6909                                 tmp_entry = tmp_entry->vme_next;
6910                         } else {
6911                                 /* Must do lookup of tmp_entry */
6912
6913                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6914                                         vm_map_unlock(dst_map);
6915                                         return(KERN_INVALID_ADDRESS);
6916                                 }
6917                                 vm_map_clip_start(dst_map, tmp_entry, start);
6918                         }
6919                 }
6920         }/* while */
6921
6922         return(KERN_SUCCESS);
6923 }/* vm_map_copy_overwrite_aligned */
6924
6925 /*
6926  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
6927  *
6928  *      Description:
6929  *              Copy in data to a kernel buffer from space in the
6930  *              source map. The original space may be optionally
6931  *              deallocated.
6932  *
6933  *              If successful, returns a new copy object.
6934  */
6935 static kern_return_t
6936 vm_map_copyin_kernel_buffer(
6937         vm_map_t        src_map,
6938         vm_map_offset_t src_addr,
6939         vm_map_size_t   len,
6940         boolean_t       src_destroy,
6941         vm_map_copy_t   *copy_result)
6942 {
6943         kern_return_t kr;
6944         vm_map_copy_t copy;
6945         vm_size_t kalloc_size;
6946
6947         if ((vm_size_t) len != len) {
6948                 /* "len" is too big and doesn't fit in a "vm_size_t" */
6949                 return KERN_RESOURCE_SHORTAGE;
6950         }
6951         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6952         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6953
6954         copy = (vm_map_copy_t) kalloc(kalloc_size);
6955         if (copy == VM_MAP_COPY_NULL) {
6956                 return KERN_RESOURCE_SHORTAGE;
6957         }
6958         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6959         copy->size = len;
6960         copy->offset = 0;
6961         copy->cpy_kdata = (void *) (copy + 1);
6962         copy->cpy_kalloc_size = kalloc_size;
6963
6964         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6965         if (kr != KERN_SUCCESS) {
6966                 kfree(copy, kalloc_size);
6967                 return kr;
6968         }
6969         if (src_destroy) {
6970                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6971                                      vm_map_round_page(src_addr + len),
6972                                      VM_MAP_REMOVE_INTERRUPTIBLE |
6973                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6974                                      (src_map == kernel_map) ?
6975                                      VM_MAP_REMOVE_KUNWIRE : 0);
6976         }
6977         *copy_result = copy;
6978         return KERN_SUCCESS;
6979 }
6980
6981 /*
6982  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
6983  *
6984  *      Description:
6985  *              Copy out data from a kernel buffer into space in the
6986  *              destination map. The space may be otpionally dynamically
6987  *              allocated.
6988  *
6989  *              If successful, consumes the copy object.
6990  *              Otherwise, the caller is responsible for it.
6991  */
6992 static int vm_map_copyout_kernel_buffer_failures = 0;
6993 static kern_return_t
6994 vm_map_copyout_kernel_buffer(
6995         vm_map_t                map,
6996         vm_map_address_t        *addr,  /* IN/OUT */
6997         vm_map_copy_t           copy,
6998         boolean_t               overwrite)
6999 {
7000         kern_return_t kr = KERN_SUCCESS;
7001         thread_t thread = current_thread();
7002
7003         if (!overwrite) {
7004
7005                 /*
7006                  * Allocate space in the target map for the data
7007                  */
7008                 *addr = 0;
7009                 kr = vm_map_enter(map,
7010                                   addr,
7011                                   vm_map_round_page(copy->size),
7012                                   (vm_map_offset_t) 0,
7013                                   VM_FLAGS_ANYWHERE,
7014                                   VM_OBJECT_NULL,
7015                                   (vm_object_offset_t) 0,
7016                                   FALSE,
7017                                   VM_PROT_DEFAULT,
7018                                   VM_PROT_ALL,
7019                                   VM_INHERIT_DEFAULT);
7020                 if (kr != KERN_SUCCESS)
7021                         return kr;
7022         }
7023
7024         /*
7025          * Copyout the data from the kernel buffer to the target map.
7026          */
7027         if (thread->map == map) {
7028
7029                 /*
7030                  * If the target map is the current map, just do
7031                  * the copy.
7032                  */
7033                 assert((vm_size_t) copy->size == copy->size);
7034                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7035                         kr = KERN_INVALID_ADDRESS;
7036                 }
7037         }
7038         else {
7039                 vm_map_t oldmap;
7040
7041                 /*
7042                  * If the target map is another map, assume the
7043                  * target's address space identity for the duration
7044                  * of the copy.
7045                  */
7046                 vm_map_reference(map);
7047                 oldmap = vm_map_switch(map);
7048
7049                 assert((vm_size_t) copy->size == copy->size);
7050                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7051                         vm_map_copyout_kernel_buffer_failures++;
7052                         kr = KERN_INVALID_ADDRESS;
7053                 }
7054
7055                 (void) vm_map_switch(oldmap);
7056                 vm_map_deallocate(map);
7057         }
7058
7059         if (kr != KERN_SUCCESS) {
7060                 /* the copy failed, clean up */
7061                 if (!overwrite) {
7062                         /*
7063                          * Deallocate the space we allocated in the target map.
7064                          */
7065                         (void) vm_map_remove(map,
7066                                              vm_map_trunc_page(*addr),
7067                                              vm_map_round_page(*addr +
7068                                                                vm_map_round_page(copy->size)),
7069                                              VM_MAP_NO_FLAGS);
7070                         *addr = 0;
7071                 }
7072         } else {
7073                 /* copy was successful, dicard the copy structure */
7074                 kfree(copy, copy->cpy_kalloc_size);
7075         }
7076
7077         return kr;
7078 }
7079
7080 /*
7081  *      Macro:          vm_map_copy_insert
7082  *
7083  *      Description:
7084  *              Link a copy chain ("copy") into a map at the
7085  *              specified location (after "where").
7086  *      Side effects:
7087  *              The copy chain is destroyed.
7088  *      Warning:
7089  *              The arguments are evaluated multiple times.
7090  */
7091 #define vm_map_copy_insert(map, where, copy)                            \
7092 MACRO_BEGIN                                                             \
7093         vm_map_store_copy_insert(map, where, copy);       \
7094         zfree(vm_map_copy_zone, copy);          \
7095 MACRO_END
7096
7097 /*
7098  *      Routine:        vm_map_copyout
7099  *
7100  *      Description:
7101  *              Copy out a copy chain ("copy") into newly-allocated
7102  *              space in the destination map.
7103  *
7104  *              If successful, consumes the copy object.
7105  *              Otherwise, the caller is responsible for it.
7106  */
7107 kern_return_t
7108 vm_map_copyout(
7109         vm_map_t                dst_map,
7110         vm_map_address_t        *dst_addr,      /* OUT */
7111         vm_map_copy_t           copy)
7112 {
7113         vm_map_size_t           size;
7114         vm_map_size_t           adjustment;
7115         vm_map_offset_t         start;
7116         vm_object_offset_t      vm_copy_start;
7117         vm_map_entry_t          last;
7118         register
7119         vm_map_entry_t          entry;
7120
7121         /*
7122          *      Check for null copy object.
7123          */
7124
7125         if (copy == VM_MAP_COPY_NULL) {
7126                 *dst_addr = 0;
7127                 return(KERN_SUCCESS);
7128         }
7129
7130         /*
7131          *      Check for special copy object, created
7132          *      by vm_map_copyin_object.
7133          */
7134
7135         if (copy->type == VM_MAP_COPY_OBJECT) {
7136                 vm_object_t             object = copy->cpy_object;
7137                 kern_return_t           kr;
7138                 vm_object_offset_t      offset;
7139
7140                 offset = vm_object_trunc_page(copy->offset);
7141                 size = vm_map_round_page(copy->size +
7142                                          (vm_map_size_t)(copy->offset - offset));
7143                 *dst_addr = 0;
7144                 kr = vm_map_enter(dst_map, dst_addr, size,
7145                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7146                                   object, offset, FALSE,
7147                                   VM_PROT_DEFAULT, VM_PROT_ALL,
7148                                   VM_INHERIT_DEFAULT);
7149                 if (kr != KERN_SUCCESS)
7150                         return(kr);
7151                 /* Account for non-pagealigned copy object */
7152                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7153                 zfree(vm_map_copy_zone, copy);
7154                 return(KERN_SUCCESS);
7155         }
7156
7157         /*
7158          *      Check for special kernel buffer allocated
7159          *      by new_ipc_kmsg_copyin.
7160          */
7161
7162         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7163                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7164                                                     copy, FALSE));
7165         }
7166
7167         /*
7168          *      Find space for the data
7169          */
7170
7171         vm_copy_start = vm_object_trunc_page(copy->offset);
7172         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7173                 - vm_copy_start;
7174
7175 StartAgain: ;
7176
7177         vm_map_lock(dst_map);
7178         if( dst_map->disable_vmentry_reuse == TRUE) {
7179                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7180                 last = entry;
7181         } else {
7182                 assert(first_free_is_valid(dst_map));
7183                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7184                 vm_map_min(dst_map) : last->vme_end;
7185         }
7186
7187         while (TRUE) {
7188                 vm_map_entry_t  next = last->vme_next;
7189                 vm_map_offset_t end = start + size;
7190
7191                 if ((end > dst_map->max_offset) || (end < start)) {
7192                         if (dst_map->wait_for_space) {
7193                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7194                                         assert_wait((event_t) dst_map,
7195                                                     THREAD_INTERRUPTIBLE);
7196                                         vm_map_unlock(dst_map);
7197                                         thread_block(THREAD_CONTINUE_NULL);
7198                                         goto StartAgain;
7199                                 }
7200                         }
7201                         vm_map_unlock(dst_map);
7202                         return(KERN_NO_SPACE);
7203                 }
7204
7205                 if ((next == vm_map_to_entry(dst_map)) ||
7206                     (next->vme_start >= end))
7207                         break;
7208
7209                 last = next;
7210                 start = last->vme_end;
7211         }
7212
7213         /*
7214          *      Since we're going to just drop the map
7215          *      entries from the copy into the destination
7216          *      map, they must come from the same pool.
7217          */
7218
7219         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7220                 /*
7221                  * Mismatches occur when dealing with the default
7222                  * pager.
7223                  */
7224                 zone_t          old_zone;
7225                 vm_map_entry_t  next, new;
7226
7227                 /*
7228                  * Find the zone that the copies were allocated from
7229                  */
7230                 old_zone = (copy->cpy_hdr.entries_pageable)
7231                         ? vm_map_entry_zone
7232                         : vm_map_kentry_zone;
7233                 entry = vm_map_copy_first_entry(copy);
7234
7235                 /*
7236                  * Reinitialize the copy so that vm_map_copy_entry_link
7237                  * will work.
7238                  */
7239                 vm_map_store_copy_reset(copy, entry);
7240                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7241
7242                 /*
7243                  * Copy each entry.
7244                  */
7245                 while (entry != vm_map_copy_to_entry(copy)) {
7246                         new = vm_map_copy_entry_create(copy);
7247                         vm_map_entry_copy_full(new, entry);
7248                         new->use_pmap = FALSE;  /* clr address space specifics */
7249                         vm_map_copy_entry_link(copy,
7250                                                vm_map_copy_last_entry(copy),
7251                                                new);
7252                         next = entry->vme_next;
7253                         zfree(old_zone, entry);
7254                         entry = next;
7255                 }
7256         }
7257
7258         /*
7259          *      Adjust the addresses in the copy chain, and
7260          *      reset the region attributes.
7261          */
7262
7263         adjustment = start - vm_copy_start;
7264         for (entry = vm_map_copy_first_entry(copy);
7265              entry != vm_map_copy_to_entry(copy);
7266              entry = entry->vme_next) {
7267                 entry->vme_start += adjustment;
7268                 entry->vme_end += adjustment;
7269
7270                 entry->inheritance = VM_INHERIT_DEFAULT;
7271                 entry->protection = VM_PROT_DEFAULT;
7272                 entry->max_protection = VM_PROT_ALL;
7273                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7274
7275                 /*
7276                  * If the entry is now wired,
7277                  * map the pages into the destination map.
7278                  */
7279                 if (entry->wired_count != 0) {
7280                         register vm_map_offset_t va;
7281                         vm_object_offset_t       offset;
7282                         register vm_object_t object;
7283                         vm_prot_t prot;
7284                         int     type_of_fault;
7285
7286                         object = entry->object.vm_object;
7287                         offset = entry->offset;
7288                         va = entry->vme_start;
7289
7290                         pmap_pageable(dst_map->pmap,
7291                                       entry->vme_start,
7292                                       entry->vme_end,
7293                                       TRUE);
7294
7295                         while (va < entry->vme_end) {
7296                                 register vm_page_t      m;
7297
7298                                 /*
7299                                  * Look up the page in the object.
7300                                  * Assert that the page will be found in the
7301                                  * top object:
7302                                  * either
7303                                  *      the object was newly created by
7304                                  *      vm_object_copy_slowly, and has
7305                                  *      copies of all of the pages from
7306                                  *      the source object
7307                                  * or
7308                                  *      the object was moved from the old
7309                                  *      map entry; because the old map
7310                                  *      entry was wired, all of the pages
7311                                  *      were in the top-level object.
7312                                  *      (XXX not true if we wire pages for
7313                                  *       reading)
7314                                  */
7315                                 vm_object_lock(object);
7316
7317                                 m = vm_page_lookup(object, offset);
7318                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7319                                     m->absent)
7320                                         panic("vm_map_copyout: wiring %p", m);
7321
7322                                 /*
7323                                  * ENCRYPTED SWAP:
7324                                  * The page is assumed to be wired here, so it
7325                                  * shouldn't be encrypted.  Otherwise, we
7326                                  * couldn't enter it in the page table, since
7327                                  * we don't want the user to see the encrypted
7328                                  * data.
7329                                  */
7330                                 ASSERT_PAGE_DECRYPTED(m);
7331
7332                                 prot = entry->protection;
7333
7334                                 if (override_nx(dst_map, entry->alias) && prot)
7335                                         prot |= VM_PROT_EXECUTE;
7336
7337                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7338
7339                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7340                                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
7341                                                &type_of_fault);
7342
7343                                 vm_object_unlock(object);
7344
7345                                 offset += PAGE_SIZE_64;
7346                                 va += PAGE_SIZE;
7347                         }
7348                 }
7349         }
7350
7351         /*
7352          *      Correct the page alignment for the result
7353          */
7354
7355         *dst_addr = start + (copy->offset - vm_copy_start);
7356
7357         /*
7358          *      Update the hints and the map size
7359          */
7360
7361         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7362
7363         dst_map->size += size;
7364
7365         /*
7366          *      Link in the copy
7367          */
7368
7369         vm_map_copy_insert(dst_map, last, copy);
7370
7371         vm_map_unlock(dst_map);
7372
7373         /*
7374          * XXX  If wiring_required, call vm_map_pageable
7375          */
7376
7377         return(KERN_SUCCESS);
7378 }
7379
7380 /*
7381  *      Routine:        vm_map_copyin
7382  *
7383  *      Description:
7384  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7385  *
7386  */
7387
7388 #undef vm_map_copyin
7389
7390 kern_return_t
7391 vm_map_copyin(
7392         vm_map_t                        src_map,
7393         vm_map_address_t        src_addr,
7394         vm_map_size_t           len,
7395         boolean_t                       src_destroy,
7396         vm_map_copy_t           *copy_result)   /* OUT */
7397 {
7398         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7399                                         FALSE, copy_result, FALSE));
7400 }
7401
7402 /*
7403  *      Routine:        vm_map_copyin_common
7404  *
7405  *      Description:
7406  *              Copy the specified region (src_addr, len) from the
7407  *              source address space (src_map), possibly removing
7408  *              the region from the source address space (src_destroy).
7409  *
7410  *      Returns:
7411  *              A vm_map_copy_t object (copy_result), suitable for
7412  *              insertion into another address space (using vm_map_copyout),
7413  *              copying over another address space region (using
7414  *              vm_map_copy_overwrite).  If the copy is unused, it
7415  *              should be destroyed (using vm_map_copy_discard).
7416  *
7417  *      In/out conditions:
7418  *              The source map should not be locked on entry.
7419  */
7420
7421 typedef struct submap_map {
7422         vm_map_t        parent_map;
7423         vm_map_offset_t base_start;
7424         vm_map_offset_t base_end;
7425         vm_map_size_t   base_len;
7426         struct submap_map *next;
7427 } submap_map_t;
7428
7429 kern_return_t
7430 vm_map_copyin_common(
7431         vm_map_t        src_map,
7432         vm_map_address_t src_addr,
7433         vm_map_size_t   len,
7434         boolean_t       src_destroy,
7435         __unused boolean_t      src_volatile,
7436         vm_map_copy_t   *copy_result,   /* OUT */
7437         boolean_t       use_maxprot)
7438 {
7439         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7440                                          * in multi-level lookup, this
7441                                          * entry contains the actual
7442                                          * vm_object/offset.
7443                                          */
7444         register
7445         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7446
7447         vm_map_offset_t src_start;      /* Start of current entry --
7448                                          * where copy is taking place now
7449                                          */
7450         vm_map_offset_t src_end;        /* End of entire region to be
7451                                          * copied */
7452         vm_map_offset_t src_base;
7453         vm_map_t        base_map = src_map;
7454         boolean_t       map_share=FALSE;
7455         submap_map_t    *parent_maps = NULL;
7456
7457         register
7458         vm_map_copy_t   copy;           /* Resulting copy */
7459         vm_map_address_t        copy_addr;
7460
7461         /*
7462          *      Check for copies of zero bytes.
7463          */
7464
7465         if (len == 0) {
7466                 *copy_result = VM_MAP_COPY_NULL;
7467                 return(KERN_SUCCESS);
7468         }
7469
7470         /*
7471          *      Check that the end address doesn't overflow
7472          */
7473         src_end = src_addr + len;
7474         if (src_end < src_addr)
7475                 return KERN_INVALID_ADDRESS;
7476
7477         /*
7478          * If the copy is sufficiently small, use a kernel buffer instead
7479          * of making a virtual copy.  The theory being that the cost of
7480          * setting up VM (and taking C-O-W faults) dominates the copy costs
7481          * for small regions.
7482          */
7483         if ((len < msg_ool_size_small) && !use_maxprot)
7484                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7485                                                    src_destroy, copy_result);
7486
7487         /*
7488          *      Compute (page aligned) start and end of region
7489          */
7490         src_start = vm_map_trunc_page(src_addr);
7491         src_end = vm_map_round_page(src_end);
7492
7493         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7494
7495         /*
7496          *      Allocate a header element for the list.
7497          *
7498          *      Use the start and end in the header to
7499          *      remember the endpoints prior to rounding.
7500          */
7501
7502         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7503         vm_map_copy_first_entry(copy) =
7504                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7505         copy->type = VM_MAP_COPY_ENTRY_LIST;
7506         copy->cpy_hdr.nentries = 0;
7507         copy->cpy_hdr.entries_pageable = TRUE;
7508
7509         vm_map_store_init( &(copy->cpy_hdr) );
7510
7511         copy->offset = src_addr;
7512         copy->size = len;
7513
7514         new_entry = vm_map_copy_entry_create(copy);
7515
7516 #define RETURN(x)                                               \
7517         MACRO_BEGIN                                             \
7518         vm_map_unlock(src_map);                                 \
7519         if(src_map != base_map)                                 \
7520                 vm_map_deallocate(src_map);                     \
7521         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7522                 vm_map_copy_entry_dispose(copy,new_entry);      \
7523         vm_map_copy_discard(copy);                              \
7524         {                                                       \
7525                 submap_map_t    *_ptr;                          \
7526                                                                 \
7527                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7528                         parent_maps=parent_maps->next;          \
7529                         if (_ptr->parent_map != base_map)       \
7530                                 vm_map_deallocate(_ptr->parent_map);    \
7531                         kfree(_ptr, sizeof(submap_map_t));      \
7532                 }                                               \
7533         }                                                       \
7534         MACRO_RETURN(x);                                        \
7535         MACRO_END
7536
7537         /*
7538          *      Find the beginning of the region.
7539          */
7540
7541         vm_map_lock(src_map);
7542
7543         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7544                 RETURN(KERN_INVALID_ADDRESS);
7545         if(!tmp_entry->is_sub_map) {
7546                 vm_map_clip_start(src_map, tmp_entry, src_start);
7547         }
7548         /* set for later submap fix-up */
7549         copy_addr = src_start;
7550
7551         /*
7552          *      Go through entries until we get to the end.
7553          */
7554
7555         while (TRUE) {
7556                 register
7557                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7558                 vm_map_size_t   src_size;               /* Size of source
7559                                                          * map entry (in both
7560                                                          * maps)
7561                                                          */
7562
7563                 register
7564                 vm_object_t             src_object;     /* Object to copy */
7565                 vm_object_offset_t      src_offset;
7566
7567                 boolean_t       src_needs_copy;         /* Should source map
7568                                                          * be made read-only
7569                                                          * for copy-on-write?
7570                                                          */
7571
7572                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7573
7574                 boolean_t       was_wired;              /* Was source wired? */
7575                 vm_map_version_t version;               /* Version before locks
7576                                                          * dropped to make copy
7577                                                          */
7578                 kern_return_t   result;                 /* Return value from
7579                                                          * copy_strategically.
7580                                                          */
7581                 while(tmp_entry->is_sub_map) {
7582                         vm_map_size_t submap_len;
7583                         submap_map_t *ptr;
7584
7585                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7586                         ptr->next = parent_maps;
7587                         parent_maps = ptr;
7588                         ptr->parent_map = src_map;
7589                         ptr->base_start = src_start;
7590                         ptr->base_end = src_end;
7591                         submap_len = tmp_entry->vme_end - src_start;
7592                         if(submap_len > (src_end-src_start))
7593                                 submap_len = src_end-src_start;
7594                         ptr->base_len = submap_len;
7595
7596                         src_start -= tmp_entry->vme_start;
7597                         src_start += tmp_entry->offset;
7598                         src_end = src_start + submap_len;
7599                         src_map = tmp_entry->object.sub_map;
7600                         vm_map_lock(src_map);
7601                         /* keep an outstanding reference for all maps in */
7602                         /* the parents tree except the base map */
7603                         vm_map_reference(src_map);
7604                         vm_map_unlock(ptr->parent_map);
7605                         if (!vm_map_lookup_entry(
7606                                     src_map, src_start, &tmp_entry))
7607                                 RETURN(KERN_INVALID_ADDRESS);
7608                         map_share = TRUE;
7609                         if(!tmp_entry->is_sub_map)
7610                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7611                         src_entry = tmp_entry;
7612                 }
7613                 /* we are now in the lowest level submap... */
7614
7615                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7616                     (tmp_entry->object.vm_object->phys_contiguous)) {
7617                         /* This is not, supported for now.In future */
7618                         /* we will need to detect the phys_contig   */
7619                         /* condition and then upgrade copy_slowly   */
7620                         /* to do physical copy from the device mem  */
7621                         /* based object. We can piggy-back off of   */
7622                         /* the was wired boolean to set-up the      */
7623                         /* proper handling */
7624                         RETURN(KERN_PROTECTION_FAILURE);
7625                 }
7626                 /*
7627                  *      Create a new address map entry to hold the result.
7628                  *      Fill in the fields from the appropriate source entries.
7629                  *      We must unlock the source map to do this if we need
7630                  *      to allocate a map entry.
7631                  */
7632                 if (new_entry == VM_MAP_ENTRY_NULL) {
7633                         version.main_timestamp = src_map->timestamp;
7634                         vm_map_unlock(src_map);
7635
7636                         new_entry = vm_map_copy_entry_create(copy);
7637
7638                         vm_map_lock(src_map);
7639                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7640                                 if (!vm_map_lookup_entry(src_map, src_start,
7641                                                          &tmp_entry)) {
7642                                         RETURN(KERN_INVALID_ADDRESS);
7643                                 }
7644                                 if (!tmp_entry->is_sub_map)
7645                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7646                                 continue; /* restart w/ new tmp_entry */
7647                         }
7648                 }
7649
7650                 /*
7651                  *      Verify that the region can be read.
7652                  */
7653                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7654                      !use_maxprot) ||
7655                     (src_entry->max_protection & VM_PROT_READ) == 0)
7656                         RETURN(KERN_PROTECTION_FAILURE);
7657
7658                 /*
7659                  *      Clip against the endpoints of the entire region.
7660                  */
7661
7662                 vm_map_clip_end(src_map, src_entry, src_end);
7663
7664                 src_size = src_entry->vme_end - src_start;
7665                 src_object = src_entry->object.vm_object;
7666                 src_offset = src_entry->offset;
7667                 was_wired = (src_entry->wired_count != 0);
7668
7669                 vm_map_entry_copy(new_entry, src_entry);
7670                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7671
7672                 /*
7673                  *      Attempt non-blocking copy-on-write optimizations.
7674                  */
7675
7676                 if (src_destroy &&
7677                     (src_object == VM_OBJECT_NULL ||
7678                      (src_object->internal && !src_object->true_share
7679                       && !map_share))) {
7680                         /*
7681                          * If we are destroying the source, and the object
7682                          * is internal, we can move the object reference
7683                          * from the source to the copy.  The copy is
7684                          * copy-on-write only if the source is.
7685                          * We make another reference to the object, because
7686                          * destroying the source entry will deallocate it.
7687                          */
7688                         vm_object_reference(src_object);
7689
7690                         /*
7691                          * Copy is always unwired.  vm_map_copy_entry
7692                          * set its wired count to zero.
7693                          */
7694
7695                         goto CopySuccessful;
7696                 }
7697
7698
7699         RestartCopy:
7700                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7701                     src_object, new_entry, new_entry->object.vm_object,
7702                     was_wired, 0);
7703                 if ((src_object == VM_OBJECT_NULL ||
7704                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7705                     vm_object_copy_quickly(
7706                             &new_entry->object.vm_object,
7707                             src_offset,
7708                             src_size,
7709                             &src_needs_copy,
7710                             &new_entry_needs_copy)) {
7711
7712                         new_entry->needs_copy = new_entry_needs_copy;
7713
7714                         /*
7715                          *      Handle copy-on-write obligations
7716                          */
7717
7718                         if (src_needs_copy && !tmp_entry->needs_copy) {
7719                                 vm_prot_t prot;
7720
7721                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7722
7723                                 if (override_nx(src_map, src_entry->alias) && prot)
7724                                         prot |= VM_PROT_EXECUTE;
7725
7726                                 vm_object_pmap_protect(
7727                                         src_object,
7728                                         src_offset,
7729                                         src_size,
7730                                         (src_entry->is_shared ?
7731                                          PMAP_NULL
7732                                          : src_map->pmap),
7733                                         src_entry->vme_start,
7734                                         prot);
7735
7736                                 tmp_entry->needs_copy = TRUE;
7737                         }
7738
7739                         /*
7740                          *      The map has never been unlocked, so it's safe
7741                          *      to move to the next entry rather than doing
7742                          *      another lookup.
7743                          */
7744
7745                         goto CopySuccessful;
7746                 }
7747
7748                 /*
7749                  *      Take an object reference, so that we may
7750                  *      release the map lock(s).
7751                  */
7752
7753                 assert(src_object != VM_OBJECT_NULL);
7754                 vm_object_reference(src_object);
7755
7756                 /*
7757                  *      Record the timestamp for later verification.
7758                  *      Unlock the map.
7759                  */
7760
7761                 version.main_timestamp = src_map->timestamp;
7762                 vm_map_unlock(src_map); /* Increments timestamp once! */
7763
7764                 /*
7765                  *      Perform the copy
7766                  */
7767
7768                 if (was_wired) {
7769                 CopySlowly:
7770                         vm_object_lock(src_object);
7771                         result = vm_object_copy_slowly(
7772                                 src_object,
7773                                 src_offset,
7774                                 src_size,
7775                                 THREAD_UNINT,
7776                                 &new_entry->object.vm_object);
7777                         new_entry->offset = 0;
7778                         new_entry->needs_copy = FALSE;
7779
7780                 }
7781                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7782                          (tmp_entry->is_shared  || map_share)) {
7783                         vm_object_t new_object;
7784
7785                         vm_object_lock_shared(src_object);
7786                         new_object = vm_object_copy_delayed(
7787                                 src_object,
7788                                 src_offset,
7789                                 src_size,
7790                                 TRUE);
7791                         if (new_object == VM_OBJECT_NULL)
7792                                 goto CopySlowly;
7793
7794                         new_entry->object.vm_object = new_object;
7795                         new_entry->needs_copy = TRUE;
7796                         result = KERN_SUCCESS;
7797
7798                 } else {
7799                         result = vm_object_copy_strategically(src_object,
7800                                                               src_offset,
7801                                                               src_size,
7802                                                               &new_entry->object.vm_object,
7803                                                               &new_entry->offset,
7804                                                               &new_entry_needs_copy);
7805
7806                         new_entry->needs_copy = new_entry_needs_copy;
7807                 }
7808
7809                 if (result != KERN_SUCCESS &&
7810                     result != KERN_MEMORY_RESTART_COPY) {
7811                         vm_map_lock(src_map);
7812                         RETURN(result);
7813                 }
7814
7815                 /*
7816                  *      Throw away the extra reference
7817                  */
7818
7819                 vm_object_deallocate(src_object);
7820
7821                 /*
7822                  *      Verify that the map has not substantially
7823                  *      changed while the copy was being made.
7824                  */
7825
7826                 vm_map_lock(src_map);
7827
7828                 if ((version.main_timestamp + 1) == src_map->timestamp)
7829                         goto VerificationSuccessful;
7830
7831                 /*
7832                  *      Simple version comparison failed.
7833                  *
7834                  *      Retry the lookup and verify that the
7835                  *      same object/offset are still present.
7836                  *
7837                  *      [Note: a memory manager that colludes with
7838                  *      the calling task can detect that we have
7839                  *      cheated.  While the map was unlocked, the
7840                  *      mapping could have been changed and restored.]
7841                  */
7842
7843                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7844                         RETURN(KERN_INVALID_ADDRESS);
7845                 }
7846
7847                 src_entry = tmp_entry;
7848                 vm_map_clip_start(src_map, src_entry, src_start);
7849
7850                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7851                      !use_maxprot) ||
7852                     ((src_entry->max_protection & VM_PROT_READ) == 0))
7853                         goto VerificationFailed;
7854
7855                 if (src_entry->vme_end < new_entry->vme_end)
7856                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7857
7858                 if ((src_entry->object.vm_object != src_object) ||
7859                     (src_entry->offset != src_offset) ) {
7860
7861                         /*
7862                          *      Verification failed.
7863                          *
7864                          *      Start over with this top-level entry.
7865                          */
7866
7867                 VerificationFailed: ;
7868
7869                         vm_object_deallocate(new_entry->object.vm_object);
7870                         tmp_entry = src_entry;
7871                         continue;
7872                 }
7873
7874                 /*
7875                  *      Verification succeeded.
7876                  */
7877
7878         VerificationSuccessful: ;
7879
7880                 if (result == KERN_MEMORY_RESTART_COPY)
7881                         goto RestartCopy;
7882
7883                 /*
7884                  *      Copy succeeded.
7885                  */
7886
7887         CopySuccessful: ;
7888
7889                 /*
7890                  *      Link in the new copy entry.
7891                  */
7892
7893                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7894                                        new_entry);
7895
7896                 /*
7897                  *      Determine whether the entire region
7898                  *      has been copied.
7899                  */
7900                 src_base = src_start;
7901                 src_start = new_entry->vme_end;
7902                 new_entry = VM_MAP_ENTRY_NULL;
7903                 while ((src_start >= src_end) && (src_end != 0)) {
7904                         if (src_map != base_map) {
7905                                 submap_map_t    *ptr;
7906
7907                                 ptr = parent_maps;
7908                                 assert(ptr != NULL);
7909                                 parent_maps = parent_maps->next;
7910
7911                                 /* fix up the damage we did in that submap */
7912                                 vm_map_simplify_range(src_map,
7913                                                       src_base,
7914                                                       src_end);
7915
7916                                 vm_map_unlock(src_map);
7917                                 vm_map_deallocate(src_map);
7918                                 vm_map_lock(ptr->parent_map);
7919                                 src_map = ptr->parent_map;
7920                                 src_base = ptr->base_start;
7921                                 src_start = ptr->base_start + ptr->base_len;
7922                                 src_end = ptr->base_end;
7923                                 if ((src_end > src_start) &&
7924                                     !vm_map_lookup_entry(
7925                                             src_map, src_start, &tmp_entry))
7926                                         RETURN(KERN_INVALID_ADDRESS);
7927                                 kfree(ptr, sizeof(submap_map_t));
7928                                 if(parent_maps == NULL)
7929                                         map_share = FALSE;
7930                                 src_entry = tmp_entry->vme_prev;
7931                         } else
7932                                 break;
7933                 }
7934                 if ((src_start >= src_end) && (src_end != 0))
7935                         break;
7936
7937                 /*
7938                  *      Verify that there are no gaps in the region
7939                  */
7940
7941                 tmp_entry = src_entry->vme_next;
7942                 if ((tmp_entry->vme_start != src_start) ||
7943                     (tmp_entry == vm_map_to_entry(src_map)))
7944                         RETURN(KERN_INVALID_ADDRESS);
7945         }
7946
7947         /*
7948          * If the source should be destroyed, do it now, since the
7949          * copy was successful.
7950          */
7951         if (src_destroy) {
7952                 (void) vm_map_delete(src_map,
7953                                      vm_map_trunc_page(src_addr),
7954                                      src_end,
7955                                      (src_map == kernel_map) ?
7956                                      VM_MAP_REMOVE_KUNWIRE :
7957                                      VM_MAP_NO_FLAGS,
7958                                      VM_MAP_NULL);
7959         } else {
7960                 /* fix up the damage we did in the base map */
7961                 vm_map_simplify_range(src_map,
7962                                       vm_map_trunc_page(src_addr),
7963                                       vm_map_round_page(src_end));
7964         }
7965
7966         vm_map_unlock(src_map);
7967
7968         /* Fix-up start and end points in copy.  This is necessary */
7969         /* when the various entries in the copy object were picked */
7970         /* up from different sub-maps */
7971
7972         tmp_entry = vm_map_copy_first_entry(copy);
7973         while (tmp_entry != vm_map_copy_to_entry(copy)) {
7974                 tmp_entry->vme_end = copy_addr +
7975                         (tmp_entry->vme_end - tmp_entry->vme_start);
7976                 tmp_entry->vme_start = copy_addr;
7977                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
7978                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7979                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7980         }
7981
7982         *copy_result = copy;
7983         return(KERN_SUCCESS);
7984
7985 #undef  RETURN
7986 }
7987
7988 /*
7989  *      vm_map_copyin_object:
7990  *
7991  *      Create a copy object from an object.
7992  *      Our caller donates an object reference.
7993  */
7994
7995 kern_return_t
7996 vm_map_copyin_object(
7997         vm_object_t             object,
7998         vm_object_offset_t      offset, /* offset of region in object */
7999         vm_object_size_t        size,   /* size of region in object */
8000         vm_map_copy_t   *copy_result)   /* OUT */
8001 {
8002         vm_map_copy_t   copy;           /* Resulting copy */
8003
8004         /*
8005          *      We drop the object into a special copy object
8006          *      that contains the object directly.
8007          */
8008
8009         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8010         copy->type = VM_MAP_COPY_OBJECT;
8011         copy->cpy_object = object;
8012         copy->offset = offset;
8013         copy->size = size;
8014
8015         *copy_result = copy;
8016         return(KERN_SUCCESS);
8017 }
8018
8019 static void
8020 vm_map_fork_share(
8021         vm_map_t        old_map,
8022         vm_map_entry_t  old_entry,
8023         vm_map_t        new_map)
8024 {
8025         vm_object_t     object;
8026         vm_map_entry_t  new_entry;
8027
8028         /*
8029          *      New sharing code.  New map entry
8030          *      references original object.  Internal
8031          *      objects use asynchronous copy algorithm for
8032          *      future copies.  First make sure we have
8033          *      the right object.  If we need a shadow,
8034          *      or someone else already has one, then
8035          *      make a new shadow and share it.
8036          */
8037
8038         object = old_entry->object.vm_object;
8039         if (old_entry->is_sub_map) {
8040                 assert(old_entry->wired_count == 0);
8041 #ifndef NO_NESTED_PMAP
8042                 if(old_entry->use_pmap) {
8043                         kern_return_t   result;
8044
8045                         result = pmap_nest(new_map->pmap,
8046                                            (old_entry->object.sub_map)->pmap,
8047                                            (addr64_t)old_entry->vme_start,
8048                                            (addr64_t)old_entry->vme_start,
8049                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8050                         if(result)
8051                                 panic("vm_map_fork_share: pmap_nest failed!");
8052                 }
8053 #endif  /* NO_NESTED_PMAP */
8054         } else if (object == VM_OBJECT_NULL) {
8055                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8056                                                             old_entry->vme_start));
8057                 old_entry->offset = 0;
8058                 old_entry->object.vm_object = object;
8059                 assert(!old_entry->needs_copy);
8060         } else if (object->copy_strategy !=
8061                    MEMORY_OBJECT_COPY_SYMMETRIC) {
8062
8063                 /*
8064                  *      We are already using an asymmetric
8065                  *      copy, and therefore we already have
8066                  *      the right object.
8067                  */
8068
8069                 assert(! old_entry->needs_copy);
8070         }
8071         else if (old_entry->needs_copy ||       /* case 1 */
8072                  object->shadowed ||            /* case 2 */
8073                  (!object->true_share &&        /* case 3 */
8074                   !old_entry->is_shared &&
8075                   (object->vo_size >
8076                    (vm_map_size_t)(old_entry->vme_end -
8077                                    old_entry->vme_start)))) {
8078
8079                 /*
8080                  *      We need to create a shadow.
8081                  *      There are three cases here.
8082                  *      In the first case, we need to
8083                  *      complete a deferred symmetrical
8084                  *      copy that we participated in.
8085                  *      In the second and third cases,
8086                  *      we need to create the shadow so
8087                  *      that changes that we make to the
8088                  *      object do not interfere with
8089                  *      any symmetrical copies which
8090                  *      have occured (case 2) or which
8091                  *      might occur (case 3).
8092                  *
8093                  *      The first case is when we had
8094                  *      deferred shadow object creation
8095                  *      via the entry->needs_copy mechanism.
8096                  *      This mechanism only works when
8097                  *      only one entry points to the source
8098                  *      object, and we are about to create
8099                  *      a second entry pointing to the
8100                  *      same object. The problem is that
8101                  *      there is no way of mapping from
8102                  *      an object to the entries pointing
8103                  *      to it. (Deferred shadow creation
8104                  *      works with one entry because occurs
8105                  *      at fault time, and we walk from the
8106                  *      entry to the object when handling
8107                  *      the fault.)
8108                  *
8109                  *      The second case is when the object
8110                  *      to be shared has already been copied
8111                  *      with a symmetric copy, but we point
8112                  *      directly to the object without
8113                  *      needs_copy set in our entry. (This
8114                  *      can happen because different ranges
8115                  *      of an object can be pointed to by
8116                  *      different entries. In particular,
8117                  *      a single entry pointing to an object
8118                  *      can be split by a call to vm_inherit,
8119                  *      which, combined with task_create, can
8120                  *      result in the different entries
8121                  *      having different needs_copy values.)
8122                  *      The shadowed flag in the object allows
8123                  *      us to detect this case. The problem
8124                  *      with this case is that if this object
8125                  *      has or will have shadows, then we
8126                  *      must not perform an asymmetric copy
8127                  *      of this object, since such a copy
8128                  *      allows the object to be changed, which
8129                  *      will break the previous symmetrical
8130                  *      copies (which rely upon the object
8131                  *      not changing). In a sense, the shadowed
8132                  *      flag says "don't change this object".
8133                  *      We fix this by creating a shadow
8134                  *      object for this object, and sharing
8135                  *      that. This works because we are free
8136                  *      to change the shadow object (and thus
8137                  *      to use an asymmetric copy strategy);
8138                  *      this is also semantically correct,
8139                  *      since this object is temporary, and
8140                  *      therefore a copy of the object is
8141                  *      as good as the object itself. (This
8142                  *      is not true for permanent objects,
8143                  *      since the pager needs to see changes,
8144                  *      which won't happen if the changes
8145                  *      are made to a copy.)
8146                  *
8147                  *      The third case is when the object
8148                  *      to be shared has parts sticking
8149                  *      outside of the entry we're working
8150                  *      with, and thus may in the future
8151                  *      be subject to a symmetrical copy.
8152                  *      (This is a preemptive version of
8153                  *      case 2.)
8154                  */
8155                 vm_object_shadow(&old_entry->object.vm_object,
8156                                  &old_entry->offset,
8157                                  (vm_map_size_t) (old_entry->vme_end -
8158                                                   old_entry->vme_start));
8159
8160                 /*
8161                  *      If we're making a shadow for other than
8162                  *      copy on write reasons, then we have
8163                  *      to remove write permission.
8164                  */
8165
8166                 if (!old_entry->needs_copy &&
8167                     (old_entry->protection & VM_PROT_WRITE)) {
8168                         vm_prot_t prot;
8169
8170                         prot = old_entry->protection & ~VM_PROT_WRITE;
8171
8172                         if (override_nx(old_map, old_entry->alias) && prot)
8173                                 prot |= VM_PROT_EXECUTE;
8174
8175                         if (old_map->mapped) {
8176                                 vm_object_pmap_protect(
8177                                         old_entry->object.vm_object,
8178                                         old_entry->offset,
8179                                         (old_entry->vme_end -
8180                                          old_entry->vme_start),
8181                                         PMAP_NULL,
8182                                         old_entry->vme_start,
8183                                         prot);
8184                         } else {
8185                                 pmap_protect(old_map->pmap,
8186                                              old_entry->vme_start,
8187                                              old_entry->vme_end,
8188                                              prot);
8189                         }
8190                 }
8191
8192                 old_entry->needs_copy = FALSE;
8193                 object = old_entry->object.vm_object;
8194         }
8195
8196
8197         /*
8198          *      If object was using a symmetric copy strategy,
8199          *      change its copy strategy to the default
8200          *      asymmetric copy strategy, which is copy_delay
8201          *      in the non-norma case and copy_call in the
8202          *      norma case. Bump the reference count for the
8203          *      new entry.
8204          */
8205
8206         if(old_entry->is_sub_map) {
8207                 vm_map_lock(old_entry->object.sub_map);
8208                 vm_map_reference(old_entry->object.sub_map);
8209                 vm_map_unlock(old_entry->object.sub_map);
8210         } else {
8211                 vm_object_lock(object);
8212                 vm_object_reference_locked(object);
8213                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8214                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8215                 }
8216                 vm_object_unlock(object);
8217         }
8218
8219         /*
8220          *      Clone the entry, using object ref from above.
8221          *      Mark both entries as shared.
8222          */
8223
8224         new_entry = vm_map_entry_create(new_map);
8225         vm_map_entry_copy(new_entry, old_entry);
8226         old_entry->is_shared = TRUE;
8227         new_entry->is_shared = TRUE;
8228
8229         /*
8230          *      Insert the entry into the new map -- we
8231          *      know we're inserting at the end of the new
8232          *      map.
8233          */
8234
8235         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8236
8237         /*
8238          *      Update the physical map
8239          */
8240
8241         if (old_entry->is_sub_map) {
8242                 /* Bill Angell pmap support goes here */
8243         } else {
8244                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8245                           old_entry->vme_end - old_entry->vme_start,
8246                           old_entry->vme_start);
8247         }
8248 }
8249
8250 static boolean_t
8251 vm_map_fork_copy(
8252         vm_map_t        old_map,
8253         vm_map_entry_t  *old_entry_p,
8254         vm_map_t        new_map)
8255 {
8256         vm_map_entry_t old_entry = *old_entry_p;
8257         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8258         vm_map_offset_t start = old_entry->vme_start;
8259         vm_map_copy_t copy;
8260         vm_map_entry_t last = vm_map_last_entry(new_map);
8261
8262         vm_map_unlock(old_map);
8263         /*
8264          *      Use maxprot version of copyin because we
8265          *      care about whether this memory can ever
8266          *      be accessed, not just whether it's accessible
8267          *      right now.
8268          */
8269         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8270             != KERN_SUCCESS) {
8271                 /*
8272                  *      The map might have changed while it
8273                  *      was unlocked, check it again.  Skip
8274                  *      any blank space or permanently
8275                  *      unreadable region.
8276                  */
8277                 vm_map_lock(old_map);
8278                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8279                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8280                         last = last->vme_next;
8281                 }
8282                 *old_entry_p = last;
8283
8284                 /*
8285                  * XXX  For some error returns, want to
8286                  * XXX  skip to the next element.  Note
8287                  *      that INVALID_ADDRESS and
8288                  *      PROTECTION_FAILURE are handled above.
8289                  */
8290
8291                 return FALSE;
8292         }
8293
8294         /*
8295          *      Insert the copy into the new map
8296          */
8297
8298         vm_map_copy_insert(new_map, last, copy);
8299
8300         /*
8301          *      Pick up the traversal at the end of
8302          *      the copied region.
8303          */
8304
8305         vm_map_lock(old_map);
8306         start += entry_size;
8307         if (! vm_map_lookup_entry(old_map, start, &last)) {
8308                 last = last->vme_next;
8309         } else {
8310                 if (last->vme_start == start) {
8311                         /*
8312                          * No need to clip here and we don't
8313                          * want to cause any unnecessary
8314                          * unnesting...
8315                          */
8316                 } else {
8317                         vm_map_clip_start(old_map, last, start);
8318                 }
8319         }
8320         *old_entry_p = last;
8321
8322         return TRUE;
8323 }
8324
8325 /*
8326  *      vm_map_fork:
8327  *
8328  *      Create and return a new map based on the old
8329  *      map, according to the inheritance values on the
8330  *      regions in that map.
8331  *
8332  *      The source map must not be locked.
8333  */
8334 vm_map_t
8335 vm_map_fork(
8336         vm_map_t        old_map)
8337 {
8338         pmap_t          new_pmap;
8339         vm_map_t        new_map;
8340         vm_map_entry_t  old_entry;
8341         vm_map_size_t   new_size = 0, entry_size;
8342         vm_map_entry_t  new_entry;
8343         boolean_t       src_needs_copy;
8344         boolean_t       new_entry_needs_copy;
8345
8346         new_pmap = pmap_create((vm_map_size_t) 0,
8347 #if defined(__i386__) || defined(__x86_64__)
8348                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8349 #else
8350                                0
8351 #endif
8352                                );
8353 #if defined(__i386__)
8354         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8355                 pmap_set_4GB_pagezero(new_pmap);
8356 #endif
8357
8358         vm_map_reference_swap(old_map);
8359         vm_map_lock(old_map);
8360
8361         new_map = vm_map_create(new_pmap,
8362                                 old_map->min_offset,
8363                                 old_map->max_offset,
8364                                 old_map->hdr.entries_pageable);
8365         for (
8366                 old_entry = vm_map_first_entry(old_map);
8367                 old_entry != vm_map_to_entry(old_map);
8368                 ) {
8369
8370                 entry_size = old_entry->vme_end - old_entry->vme_start;
8371
8372                 switch (old_entry->inheritance) {
8373                 case VM_INHERIT_NONE:
8374                         break;
8375
8376                 case VM_INHERIT_SHARE:
8377                         vm_map_fork_share(old_map, old_entry, new_map);
8378                         new_size += entry_size;
8379                         break;
8380
8381                 case VM_INHERIT_COPY:
8382
8383                         /*
8384                          *      Inline the copy_quickly case;
8385                          *      upon failure, fall back on call
8386                          *      to vm_map_fork_copy.
8387                          */
8388
8389                         if(old_entry->is_sub_map)
8390                                 break;
8391                         if ((old_entry->wired_count != 0) ||
8392                             ((old_entry->object.vm_object != NULL) &&
8393                              (old_entry->object.vm_object->true_share))) {
8394                                 goto slow_vm_map_fork_copy;
8395                         }
8396
8397                         new_entry = vm_map_entry_create(new_map);
8398                         vm_map_entry_copy(new_entry, old_entry);
8399                         /* clear address space specifics */
8400                         new_entry->use_pmap = FALSE;
8401
8402                         if (! vm_object_copy_quickly(
8403                                     &new_entry->object.vm_object,
8404                                     old_entry->offset,
8405                                     (old_entry->vme_end -
8406                                      old_entry->vme_start),
8407                                     &src_needs_copy,
8408                                     &new_entry_needs_copy)) {
8409                                 vm_map_entry_dispose(new_map, new_entry);
8410                                 goto slow_vm_map_fork_copy;
8411                         }
8412
8413                         /*
8414                          *      Handle copy-on-write obligations
8415                          */
8416
8417                         if (src_needs_copy && !old_entry->needs_copy) {
8418                                 vm_prot_t prot;
8419
8420                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8421
8422                                 if (override_nx(old_map, old_entry->alias) && prot)
8423                                         prot |= VM_PROT_EXECUTE;
8424
8425                                 vm_object_pmap_protect(
8426                                         old_entry->object.vm_object,
8427                                         old_entry->offset,
8428                                         (old_entry->vme_end -
8429                                          old_entry->vme_start),
8430                                         ((old_entry->is_shared
8431                                           || old_map->mapped)
8432                                          ? PMAP_NULL :
8433                                          old_map->pmap),
8434                                         old_entry->vme_start,
8435                                         prot);
8436
8437                                 old_entry->needs_copy = TRUE;
8438                         }
8439                         new_entry->needs_copy = new_entry_needs_copy;
8440
8441                         /*
8442                          *      Insert the entry at the end
8443                          *      of the map.
8444                          */
8445
8446                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8447                                           new_entry);
8448                         new_size += entry_size;
8449                         break;
8450
8451                 slow_vm_map_fork_copy:
8452                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8453                                 new_size += entry_size;
8454                         }
8455                         continue;
8456                 }
8457                 old_entry = old_entry->vme_next;
8458         }
8459
8460         new_map->size = new_size;
8461         vm_map_unlock(old_map);
8462         vm_map_deallocate(old_map);
8463
8464         return(new_map);
8465 }
8466
8467 /*
8468  * vm_map_exec:
8469  *
8470  *      Setup the "new_map" with the proper execution environment according
8471  *      to the type of executable (platform, 64bit, chroot environment).
8472  *      Map the comm page and shared region, etc...
8473  */
8474 kern_return_t
8475 vm_map_exec(
8476         vm_map_t        new_map,
8477         task_t          task,
8478         void            *fsroot,
8479         cpu_type_t      cpu)
8480 {
8481         SHARED_REGION_TRACE_DEBUG(
8482                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8483                  current_task(), new_map, task, fsroot, cpu));
8484         (void) vm_commpage_enter(new_map, task);
8485         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8486         SHARED_REGION_TRACE_DEBUG(
8487                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8488                  current_task(), new_map, task, fsroot, cpu));
8489         return KERN_SUCCESS;
8490 }
8491
8492 /*
8493  *      vm_map_lookup_locked:
8494  *
8495  *      Finds the VM object, offset, and
8496  *      protection for a given virtual address in the
8497  *      specified map, assuming a page fault of the
8498  *      type specified.
8499  *
8500  *      Returns the (object, offset, protection) for
8501  *      this address, whether it is wired down, and whether
8502  *      this map has the only reference to the data in question.
8503  *      In order to later verify this lookup, a "version"
8504  *      is returned.
8505  *
8506  *      The map MUST be locked by the caller and WILL be
8507  *      locked on exit.  In order to guarantee the
8508  *      existence of the returned object, it is returned
8509  *      locked.
8510  *
8511  *      If a lookup is requested with "write protection"
8512  *      specified, the map may be changed to perform virtual
8513  *      copying operations, although the data referenced will
8514  *      remain the same.
8515  */
8516 kern_return_t
8517 vm_map_lookup_locked(
8518         vm_map_t                *var_map,       /* IN/OUT */
8519         vm_map_offset_t         vaddr,
8520         vm_prot_t               fault_type,
8521         int                     object_lock_type,
8522         vm_map_version_t        *out_version,   /* OUT */
8523         vm_object_t             *object,        /* OUT */
8524         vm_object_offset_t      *offset,        /* OUT */
8525         vm_prot_t               *out_prot,      /* OUT */
8526         boolean_t               *wired,         /* OUT */
8527         vm_object_fault_info_t  fault_info,     /* OUT */
8528         vm_map_t                *real_map)
8529 {
8530         vm_map_entry_t                  entry;
8531         register vm_map_t               map = *var_map;
8532         vm_map_t                        old_map = *var_map;
8533         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8534         vm_map_offset_t                 cow_parent_vaddr = 0;
8535         vm_map_offset_t                 old_start = 0;
8536         vm_map_offset_t                 old_end = 0;
8537         register vm_prot_t              prot;
8538         boolean_t                       mask_protections;
8539         vm_prot_t                       original_fault_type;
8540
8541         /*
8542          * VM_PROT_MASK means that the caller wants us to use "fault_type"
8543          * as a mask against the mapping's actual protections, not as an
8544          * absolute value.
8545          */
8546         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8547         fault_type &= ~VM_PROT_IS_MASK;
8548         original_fault_type = fault_type;
8549
8550         *real_map = map;
8551
8552 RetryLookup:
8553         fault_type = original_fault_type;
8554
8555         /*
8556          *      If the map has an interesting hint, try it before calling
8557          *      full blown lookup routine.
8558          */
8559         entry = map->hint;
8560
8561         if ((entry == vm_map_to_entry(map)) ||
8562             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8563                 vm_map_entry_t  tmp_entry;
8564
8565                 /*
8566                  *      Entry was either not a valid hint, or the vaddr
8567                  *      was not contained in the entry, so do a full lookup.
8568                  */
8569                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8570                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8571                                 vm_map_unlock(cow_sub_map_parent);
8572                         if((*real_map != map)
8573                            && (*real_map != cow_sub_map_parent))
8574                                 vm_map_unlock(*real_map);
8575                         return KERN_INVALID_ADDRESS;
8576                 }
8577
8578                 entry = tmp_entry;
8579         }
8580         if(map == old_map) {
8581                 old_start = entry->vme_start;
8582                 old_end = entry->vme_end;
8583         }
8584
8585         /*
8586          *      Handle submaps.  Drop lock on upper map, submap is
8587          *      returned locked.
8588          */
8589
8590 submap_recurse:
8591         if (entry->is_sub_map) {
8592                 vm_map_offset_t         local_vaddr;
8593                 vm_map_offset_t         end_delta;
8594                 vm_map_offset_t         start_delta;
8595                 vm_map_entry_t          submap_entry;
8596                 boolean_t               mapped_needs_copy=FALSE;
8597
8598                 local_vaddr = vaddr;
8599
8600                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8601                         /* if real_map equals map we unlock below */
8602                         if ((*real_map != map) &&
8603                             (*real_map != cow_sub_map_parent))
8604                                 vm_map_unlock(*real_map);
8605                         *real_map = entry->object.sub_map;
8606                 }
8607
8608                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8609                         if (!mapped_needs_copy) {
8610                                 if (vm_map_lock_read_to_write(map)) {
8611                                         vm_map_lock_read(map);
8612                                         /* XXX FBDP: entry still valid ? */
8613                                         if(*real_map == entry->object.sub_map)
8614                                                 *real_map = map;
8615                                         goto RetryLookup;
8616                                 }
8617                                 vm_map_lock_read(entry->object.sub_map);
8618                                 cow_sub_map_parent = map;
8619                                 /* reset base to map before cow object */
8620                                 /* this is the map which will accept   */
8621                                 /* the new cow object */
8622                                 old_start = entry->vme_start;
8623                                 old_end = entry->vme_end;
8624                                 cow_parent_vaddr = vaddr;
8625                                 mapped_needs_copy = TRUE;
8626                         } else {
8627                                 vm_map_lock_read(entry->object.sub_map);
8628                                 if((cow_sub_map_parent != map) &&
8629                                    (*real_map != map))
8630                                         vm_map_unlock(map);
8631                         }
8632                 } else {
8633                         vm_map_lock_read(entry->object.sub_map);
8634                         /* leave map locked if it is a target */
8635                         /* cow sub_map above otherwise, just  */
8636                         /* follow the maps down to the object */
8637                         /* here we unlock knowing we are not  */
8638                         /* revisiting the map.  */
8639                         if((*real_map != map) && (map != cow_sub_map_parent))
8640                                 vm_map_unlock_read(map);
8641                 }
8642
8643                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8644                 *var_map = map = entry->object.sub_map;
8645
8646                 /* calculate the offset in the submap for vaddr */
8647                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8648
8649         RetrySubMap:
8650                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8651                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8652                                 vm_map_unlock(cow_sub_map_parent);
8653                         }
8654                         if((*real_map != map)
8655                            && (*real_map != cow_sub_map_parent)) {
8656                                 vm_map_unlock(*real_map);
8657                         }
8658                         *real_map = map;
8659                         return KERN_INVALID_ADDRESS;
8660                 }
8661
8662                 /* find the attenuated shadow of the underlying object */
8663                 /* on our target map */
8664
8665                 /* in english the submap object may extend beyond the     */
8666                 /* region mapped by the entry or, may only fill a portion */
8667                 /* of it.  For our purposes, we only care if the object   */
8668                 /* doesn't fill.  In this case the area which will        */
8669                 /* ultimately be clipped in the top map will only need    */
8670                 /* to be as big as the portion of the underlying entry    */
8671                 /* which is mapped */
8672                 start_delta = submap_entry->vme_start > entry->offset ?
8673                         submap_entry->vme_start - entry->offset : 0;
8674
8675                 end_delta =
8676                         (entry->offset + start_delta + (old_end - old_start)) <=
8677                         submap_entry->vme_end ?
8678                         0 : (entry->offset +
8679                              (old_end - old_start))
8680                         - submap_entry->vme_end;
8681
8682                 old_start += start_delta;
8683                 old_end -= end_delta;
8684
8685                 if(submap_entry->is_sub_map) {
8686                         entry = submap_entry;
8687                         vaddr = local_vaddr;
8688                         goto submap_recurse;
8689                 }
8690
8691                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8692
8693                         vm_object_t     sub_object, copy_object;
8694                         vm_object_offset_t copy_offset;
8695                         vm_map_offset_t local_start;
8696                         vm_map_offset_t local_end;
8697                         boolean_t               copied_slowly = FALSE;
8698
8699                         if (vm_map_lock_read_to_write(map)) {
8700                                 vm_map_lock_read(map);
8701                                 old_start -= start_delta;
8702                                 old_end += end_delta;
8703                                 goto RetrySubMap;
8704                         }
8705
8706
8707                         sub_object = submap_entry->object.vm_object;
8708                         if (sub_object == VM_OBJECT_NULL) {
8709                                 sub_object =
8710                                         vm_object_allocate(
8711                                                 (vm_map_size_t)
8712                                                 (submap_entry->vme_end -
8713                                                  submap_entry->vme_start));
8714                                 submap_entry->object.vm_object = sub_object;
8715                                 submap_entry->offset = 0;
8716                         }
8717                         local_start =  local_vaddr -
8718                                 (cow_parent_vaddr - old_start);
8719                         local_end = local_vaddr +
8720                                 (old_end - cow_parent_vaddr);
8721                         vm_map_clip_start(map, submap_entry, local_start);
8722                         vm_map_clip_end(map, submap_entry, local_end);
8723                         /* unnesting was done in vm_map_clip_start/end() */
8724                         assert(!submap_entry->use_pmap);
8725
8726                         /* This is the COW case, lets connect */
8727                         /* an entry in our space to the underlying */
8728                         /* object in the submap, bypassing the  */
8729                         /* submap. */
8730
8731
8732                         if(submap_entry->wired_count != 0 ||
8733                            (sub_object->copy_strategy ==
8734                             MEMORY_OBJECT_COPY_NONE)) {
8735                                 vm_object_lock(sub_object);
8736                                 vm_object_copy_slowly(sub_object,
8737                                                       submap_entry->offset,
8738                                                       (submap_entry->vme_end -
8739                                                        submap_entry->vme_start),
8740                                                       FALSE,
8741                                                       &copy_object);
8742                                 copied_slowly = TRUE;
8743                         } else {
8744
8745                                 /* set up shadow object */
8746                                 copy_object = sub_object;
8747                                 vm_object_reference(copy_object);
8748                                 sub_object->shadowed = TRUE;
8749                                 submap_entry->needs_copy = TRUE;
8750
8751                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8752
8753                                 if (override_nx(map, submap_entry->alias) && prot)
8754                                         prot |= VM_PROT_EXECUTE;
8755
8756                                 vm_object_pmap_protect(
8757                                         sub_object,
8758                                         submap_entry->offset,
8759                                         submap_entry->vme_end -
8760                                         submap_entry->vme_start,
8761                                         (submap_entry->is_shared
8762                                          || map->mapped) ?
8763                                         PMAP_NULL : map->pmap,
8764                                         submap_entry->vme_start,
8765                                         prot);
8766                         }
8767
8768                         /*
8769                          * Adjust the fault offset to the submap entry.
8770                          */
8771                         copy_offset = (local_vaddr -
8772                                        submap_entry->vme_start +
8773                                        submap_entry->offset);
8774
8775                         /* This works diffently than the   */
8776                         /* normal submap case. We go back  */
8777                         /* to the parent of the cow map and*/
8778                         /* clip out the target portion of  */
8779                         /* the sub_map, substituting the   */
8780                         /* new copy object,                */
8781
8782                         vm_map_unlock(map);
8783                         local_start = old_start;
8784                         local_end = old_end;
8785                         map = cow_sub_map_parent;
8786                         *var_map = cow_sub_map_parent;
8787                         vaddr = cow_parent_vaddr;
8788                         cow_sub_map_parent = NULL;
8789
8790                         if(!vm_map_lookup_entry(map,
8791                                                 vaddr, &entry)) {
8792                                 vm_object_deallocate(
8793                                         copy_object);
8794                                 vm_map_lock_write_to_read(map);
8795                                 return KERN_INVALID_ADDRESS;
8796                         }
8797
8798                         /* clip out the portion of space */
8799                         /* mapped by the sub map which   */
8800                         /* corresponds to the underlying */
8801                         /* object */
8802
8803                         /*
8804                          * Clip (and unnest) the smallest nested chunk
8805                          * possible around the faulting address...
8806                          */
8807                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8808                         local_end = local_start + pmap_nesting_size_min;
8809                         /*
8810                          * ... but don't go beyond the "old_start" to "old_end"
8811                          * range, to avoid spanning over another VM region
8812                          * with a possibly different VM object and/or offset.
8813                          */
8814                         if (local_start < old_start) {
8815                                 local_start = old_start;
8816                         }
8817                         if (local_end > old_end) {
8818                                 local_end = old_end;
8819                         }
8820                         /*
8821                          * Adjust copy_offset to the start of the range.
8822                          */
8823                         copy_offset -= (vaddr - local_start);
8824
8825                         vm_map_clip_start(map, entry, local_start);
8826                         vm_map_clip_end(map, entry, local_end);
8827                         /* unnesting was done in vm_map_clip_start/end() */
8828                         assert(!entry->use_pmap);
8829
8830                         /* substitute copy object for */
8831                         /* shared map entry           */
8832                         vm_map_deallocate(entry->object.sub_map);
8833                         entry->is_sub_map = FALSE;
8834                         entry->object.vm_object = copy_object;
8835
8836                         /* propagate the submap entry's protections */
8837                         entry->protection |= submap_entry->protection;
8838                         entry->max_protection |= submap_entry->max_protection;
8839
8840                         if(copied_slowly) {
8841                                 entry->offset = local_start - old_start;
8842                                 entry->needs_copy = FALSE;
8843                                 entry->is_shared = FALSE;
8844                         } else {
8845                                 entry->offset = copy_offset;
8846                                 entry->needs_copy = TRUE;
8847                                 if(entry->inheritance == VM_INHERIT_SHARE)
8848                                         entry->inheritance = VM_INHERIT_COPY;
8849                                 if (map != old_map)
8850                                         entry->is_shared = TRUE;
8851                         }
8852                         if(entry->inheritance == VM_INHERIT_SHARE)
8853                                 entry->inheritance = VM_INHERIT_COPY;
8854
8855                         vm_map_lock_write_to_read(map);
8856                 } else {
8857                         if((cow_sub_map_parent)
8858                            && (cow_sub_map_parent != *real_map)
8859                            && (cow_sub_map_parent != map)) {
8860                                 vm_map_unlock(cow_sub_map_parent);
8861                         }
8862                         entry = submap_entry;
8863                         vaddr = local_vaddr;
8864                 }
8865         }
8866
8867         /*
8868          *      Check whether this task is allowed to have
8869          *      this page.
8870          */
8871
8872         prot = entry->protection;
8873
8874         if (override_nx(map, entry->alias) && prot) {
8875                 /*
8876                  * HACK -- if not a stack, then allow execution
8877                  */
8878                 prot |= VM_PROT_EXECUTE;
8879         }
8880
8881         if (mask_protections) {
8882                 fault_type &= prot;
8883                 if (fault_type == VM_PROT_NONE) {
8884                         goto protection_failure;
8885                 }
8886         }
8887         if ((fault_type & (prot)) != fault_type) {
8888         protection_failure:
8889                 if (*real_map != map) {
8890                         vm_map_unlock(*real_map);
8891                 }
8892                 *real_map = map;
8893
8894                 if ((fault_type & VM_PROT_EXECUTE) && prot)
8895                         log_stack_execution_failure((addr64_t)vaddr, prot);
8896
8897                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8898                 return KERN_PROTECTION_FAILURE;
8899         }
8900
8901         /*
8902          *      If this page is not pageable, we have to get
8903          *      it for all possible accesses.
8904          */
8905
8906         *wired = (entry->wired_count != 0);
8907         if (*wired)
8908                 fault_type = prot;
8909
8910         /*
8911          *      If the entry was copy-on-write, we either ...
8912          */
8913
8914         if (entry->needs_copy) {
8915                 /*
8916                  *      If we want to write the page, we may as well
8917                  *      handle that now since we've got the map locked.
8918                  *
8919                  *      If we don't need to write the page, we just
8920                  *      demote the permissions allowed.
8921                  */
8922
8923                 if ((fault_type & VM_PROT_WRITE) || *wired) {
8924                         /*
8925                          *      Make a new object, and place it in the
8926                          *      object chain.  Note that no new references
8927                          *      have appeared -- one just moved from the
8928                          *      map to the new object.
8929                          */
8930
8931                         if (vm_map_lock_read_to_write(map)) {
8932                                 vm_map_lock_read(map);
8933                                 goto RetryLookup;
8934                         }
8935                         vm_object_shadow(&entry->object.vm_object,
8936                                          &entry->offset,
8937                                          (vm_map_size_t) (entry->vme_end -
8938                                                           entry->vme_start));
8939
8940                         entry->object.vm_object->shadowed = TRUE;
8941                         entry->needs_copy = FALSE;
8942                         vm_map_lock_write_to_read(map);
8943                 }
8944                 else {
8945                         /*
8946                          *      We're attempting to read a copy-on-write
8947                          *      page -- don't allow writes.
8948                          */
8949
8950                         prot &= (~VM_PROT_WRITE);
8951                 }
8952         }
8953
8954         /*
8955          *      Create an object if necessary.
8956          */
8957         if (entry->object.vm_object == VM_OBJECT_NULL) {
8958
8959                 if (vm_map_lock_read_to_write(map)) {
8960                         vm_map_lock_read(map);
8961                         goto RetryLookup;
8962                 }
8963
8964                 entry->object.vm_object = vm_object_allocate(
8965                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
8966                 entry->offset = 0;
8967                 vm_map_lock_write_to_read(map);
8968         }
8969
8970         /*
8971          *      Return the object/offset from this entry.  If the entry
8972          *      was copy-on-write or empty, it has been fixed up.  Also
8973          *      return the protection.
8974          */
8975
8976         *offset = (vaddr - entry->vme_start) + entry->offset;
8977         *object = entry->object.vm_object;
8978         *out_prot = prot;
8979
8980         if (fault_info) {
8981                 fault_info->interruptible = THREAD_UNINT; /* for now... */
8982                 /* ... the caller will change "interruptible" if needed */
8983                 fault_info->cluster_size = 0;
8984                 fault_info->user_tag = entry->alias;
8985                 fault_info->behavior = entry->behavior;
8986                 fault_info->lo_offset = entry->offset;
8987                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8988                 fault_info->no_cache  = entry->no_cache;
8989                 fault_info->stealth = FALSE;
8990                 fault_info->io_sync = FALSE;
8991                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
8992                 fault_info->mark_zf_absent = FALSE;
8993         }
8994
8995         /*
8996          *      Lock the object to prevent it from disappearing
8997          */
8998         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8999                 vm_object_lock(*object);
9000         else
9001                 vm_object_lock_shared(*object);
9002
9003         /*
9004          *      Save the version number
9005          */
9006
9007         out_version->main_timestamp = map->timestamp;
9008
9009         return KERN_SUCCESS;
9010 }
9011
9012
9013 /*
9014  *      vm_map_verify:
9015  *
9016  *      Verifies that the map in question has not changed
9017  *      since the given version.  If successful, the map
9018  *      will not change until vm_map_verify_done() is called.
9019  */
9020 boolean_t
9021 vm_map_verify(
9022         register vm_map_t               map,
9023         register vm_map_version_t       *version)       /* REF */
9024 {
9025         boolean_t       result;
9026
9027         vm_map_lock_read(map);
9028         result = (map->timestamp == version->main_timestamp);
9029
9030         if (!result)
9031                 vm_map_unlock_read(map);
9032
9033         return(result);
9034 }
9035
9036 /*
9037  *      vm_map_verify_done:
9038  *
9039  *      Releases locks acquired by a vm_map_verify.
9040  *
9041  *      This is now a macro in vm/vm_map.h.  It does a
9042  *      vm_map_unlock_read on the map.
9043  */
9044
9045
9046 /*
9047  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9048  *      Goes away after regular vm_region_recurse function migrates to
9049  *      64 bits
9050  *      vm_region_recurse: A form of vm_region which follows the
9051  *      submaps in a target map
9052  *
9053  */
9054
9055 kern_return_t
9056 vm_map_region_recurse_64(
9057         vm_map_t                 map,
9058         vm_map_offset_t *address,               /* IN/OUT */
9059         vm_map_size_t           *size,                  /* OUT */
9060         natural_t               *nesting_depth, /* IN/OUT */
9061         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
9062         mach_msg_type_number_t  *count) /* IN/OUT */
9063 {
9064         vm_region_extended_info_data_t  extended;
9065         vm_map_entry_t                  tmp_entry;
9066         vm_map_offset_t                 user_address;
9067         unsigned int                    user_max_depth;
9068
9069         /*
9070          * "curr_entry" is the VM map entry preceding or including the
9071          * address we're looking for.
9072          * "curr_map" is the map or sub-map containing "curr_entry".
9073          * "curr_address" is the equivalent of the top map's "user_address"
9074          * in the current map.
9075          * "curr_offset" is the cumulated offset of "curr_map" in the
9076          * target task's address space.
9077          * "curr_depth" is the depth of "curr_map" in the chain of
9078          * sub-maps.
9079          *
9080          * "curr_max_below" and "curr_max_above" limit the range (around
9081          * "curr_address") we should take into account in the current (sub)map.
9082          * They limit the range to what's visible through the map entries
9083          * we've traversed from the top map to the current map.
9084
9085          */
9086         vm_map_entry_t                  curr_entry;
9087         vm_map_address_t                curr_address;
9088         vm_map_offset_t                 curr_offset;
9089         vm_map_t                        curr_map;
9090         unsigned int                    curr_depth;
9091         vm_map_offset_t                 curr_max_below, curr_max_above;
9092         vm_map_offset_t                 curr_skip;
9093
9094         /*
9095          * "next_" is the same as "curr_" but for the VM region immediately
9096          * after the address we're looking for.  We need to keep track of this
9097          * too because we want to return info about that region if the
9098          * address we're looking for is not mapped.
9099          */
9100         vm_map_entry_t                  next_entry;
9101         vm_map_offset_t                 next_offset;
9102         vm_map_offset_t                 next_address;
9103         vm_map_t                        next_map;
9104         unsigned int                    next_depth;
9105         vm_map_offset_t                 next_max_below, next_max_above;
9106         vm_map_offset_t                 next_skip;
9107
9108         boolean_t                       look_for_pages;
9109         vm_region_submap_short_info_64_t short_info;
9110
9111         if (map == VM_MAP_NULL) {
9112                 /* no address space to work on */
9113                 return KERN_INVALID_ARGUMENT;
9114         }
9115
9116         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9117                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9118                         /*
9119                          * "info" structure is not big enough and
9120                          * would overflow
9121                          */
9122                         return KERN_INVALID_ARGUMENT;
9123                 } else {
9124                         look_for_pages = FALSE;
9125                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9126                         short_info = (vm_region_submap_short_info_64_t) submap_info;
9127                         submap_info = NULL;
9128                 }
9129         } else {
9130                 look_for_pages = TRUE;
9131                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9132                 short_info = NULL;
9133         }
9134
9135
9136         user_address = *address;
9137         user_max_depth = *nesting_depth;
9138
9139         curr_entry = NULL;
9140         curr_map = map;
9141         curr_address = user_address;
9142         curr_offset = 0;
9143         curr_skip = 0;
9144         curr_depth = 0;
9145         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9146         curr_max_below = curr_address;
9147
9148         next_entry = NULL;
9149         next_map = NULL;
9150         next_address = 0;
9151         next_offset = 0;
9152         next_skip = 0;
9153         next_depth = 0;
9154         next_max_above = (vm_map_offset_t) -1;
9155         next_max_below = (vm_map_offset_t) -1;
9156
9157         if (not_in_kdp) {
9158                 vm_map_lock_read(curr_map);
9159         }
9160
9161         for (;;) {
9162                 if (vm_map_lookup_entry(curr_map,
9163                                         curr_address,
9164                                         &tmp_entry)) {
9165                         /* tmp_entry contains the address we're looking for */
9166                         curr_entry = tmp_entry;
9167                 } else {
9168                         vm_map_offset_t skip;
9169                         /*
9170                          * The address is not mapped.  "tmp_entry" is the
9171                          * map entry preceding the address.  We want the next
9172                          * one, if it exists.
9173                          */
9174                         curr_entry = tmp_entry->vme_next;
9175
9176                         if (curr_entry == vm_map_to_entry(curr_map) ||
9177                             (curr_entry->vme_start >=
9178                              curr_address + curr_max_above)) {
9179                                 /* no next entry at this level: stop looking */
9180                                 if (not_in_kdp) {
9181                                         vm_map_unlock_read(curr_map);
9182                                 }
9183                                 curr_entry = NULL;
9184                                 curr_map = NULL;
9185                                 curr_offset = 0;
9186                                 curr_depth = 0;
9187                                 curr_max_above = 0;
9188                                 curr_max_below = 0;
9189                                 break;
9190                         }
9191
9192                         /* adjust current address and offset */
9193                         skip = curr_entry->vme_start - curr_address;
9194                         curr_address = curr_entry->vme_start;
9195                         curr_skip = skip;
9196                         curr_offset += skip;
9197                         curr_max_above -= skip;
9198                         curr_max_below = 0;
9199                 }
9200
9201                 /*
9202                  * Is the next entry at this level closer to the address (or
9203                  * deeper in the submap chain) than the one we had
9204                  * so far ?
9205                  */
9206                 tmp_entry = curr_entry->vme_next;
9207                 if (tmp_entry == vm_map_to_entry(curr_map)) {
9208                         /* no next entry at this level */
9209                 } else if (tmp_entry->vme_start >=
9210                            curr_address + curr_max_above) {
9211                         /*
9212                          * tmp_entry is beyond the scope of what we mapped of
9213                          * this submap in the upper level: ignore it.
9214                          */
9215                 } else if ((next_entry == NULL) ||
9216                            (tmp_entry->vme_start + curr_offset <=
9217                             next_entry->vme_start + next_offset)) {
9218                         /*
9219                          * We didn't have a "next_entry" or this one is
9220                          * closer to the address we're looking for:
9221                          * use this "tmp_entry" as the new "next_entry".
9222                          */
9223                         if (next_entry != NULL) {
9224                                 /* unlock the last "next_map" */
9225                                 if (next_map != curr_map && not_in_kdp) {
9226                                         vm_map_unlock_read(next_map);
9227                                 }
9228                         }
9229                         next_entry = tmp_entry;
9230                         next_map = curr_map;
9231                         next_depth = curr_depth;
9232                         next_address = next_entry->vme_start;
9233                         next_skip = curr_skip;
9234                         next_offset = curr_offset;
9235                         next_offset += (next_address - curr_address);
9236                         next_max_above = MIN(next_max_above, curr_max_above);
9237                         next_max_above = MIN(next_max_above,
9238                                              next_entry->vme_end - next_address);
9239                         next_max_below = MIN(next_max_below, curr_max_below);
9240                         next_max_below = MIN(next_max_below,
9241                                              next_address - next_entry->vme_start);
9242                 }
9243
9244                 /*
9245                  * "curr_max_{above,below}" allow us to keep track of the
9246                  * portion of the submap that is actually mapped at this level:
9247                  * the rest of that submap is irrelevant to us, since it's not
9248                  * mapped here.
9249                  * The relevant portion of the map starts at
9250                  * "curr_entry->offset" up to the size of "curr_entry".
9251                  */
9252                 curr_max_above = MIN(curr_max_above,
9253                                      curr_entry->vme_end - curr_address);
9254                 curr_max_below = MIN(curr_max_below,
9255                                      curr_address - curr_entry->vme_start);
9256
9257                 if (!curr_entry->is_sub_map ||
9258                     curr_depth >= user_max_depth) {
9259                         /*
9260                          * We hit a leaf map or we reached the maximum depth
9261                          * we could, so stop looking.  Keep the current map
9262                          * locked.
9263                          */
9264                         break;
9265                 }
9266
9267                 /*
9268                  * Get down to the next submap level.
9269                  */
9270
9271                 /*
9272                  * Lock the next level and unlock the current level,
9273                  * unless we need to keep it locked to access the "next_entry"
9274                  * later.
9275                  */
9276                 if (not_in_kdp) {
9277                         vm_map_lock_read(curr_entry->object.sub_map);
9278                 }
9279                 if (curr_map == next_map) {
9280                         /* keep "next_map" locked in case we need it */
9281                 } else {
9282                         /* release this map */
9283                         if (not_in_kdp)
9284                                 vm_map_unlock_read(curr_map);
9285                 }
9286
9287                 /*
9288                  * Adjust the offset.  "curr_entry" maps the submap
9289                  * at relative address "curr_entry->vme_start" in the
9290                  * curr_map but skips the first "curr_entry->offset"
9291                  * bytes of the submap.
9292                  * "curr_offset" always represents the offset of a virtual
9293                  * address in the curr_map relative to the absolute address
9294                  * space (i.e. the top-level VM map).
9295                  */
9296                 curr_offset +=
9297                         (curr_entry->offset - curr_entry->vme_start);
9298                 curr_address = user_address + curr_offset;
9299                 /* switch to the submap */
9300                 curr_map = curr_entry->object.sub_map;
9301                 curr_depth++;
9302                 curr_entry = NULL;
9303         }
9304
9305         if (curr_entry == NULL) {
9306                 /* no VM region contains the address... */
9307                 if (next_entry == NULL) {
9308                         /* ... and no VM region follows it either */
9309                         return KERN_INVALID_ADDRESS;
9310                 }
9311                 /* ... gather info about the next VM region */
9312                 curr_entry = next_entry;
9313                 curr_map = next_map;    /* still locked ... */
9314                 curr_address = next_address;
9315                 curr_skip = next_skip;
9316                 curr_offset = next_offset;
9317                 curr_depth = next_depth;
9318                 curr_max_above = next_max_above;
9319                 curr_max_below = next_max_below;
9320                 if (curr_map == map) {
9321                         user_address = curr_address;
9322                 }
9323         } else {
9324                 /* we won't need "next_entry" after all */
9325                 if (next_entry != NULL) {
9326                         /* release "next_map" */
9327                         if (next_map != curr_map && not_in_kdp) {
9328                                 vm_map_unlock_read(next_map);
9329                         }
9330                 }
9331         }
9332         next_entry = NULL;
9333         next_map = NULL;
9334         next_offset = 0;
9335         next_skip = 0;
9336         next_depth = 0;
9337         next_max_below = -1;
9338         next_max_above = -1;
9339
9340         *nesting_depth = curr_depth;
9341         *size = curr_max_above + curr_max_below;
9342         *address = user_address + curr_skip - curr_max_below;
9343
9344 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9345 // so probably should be a real 32b ID vs. ptr.
9346 // Current users just check for equality
9347 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9348
9349         if (look_for_pages) {
9350                 submap_info->user_tag = curr_entry->alias;
9351                 submap_info->offset = curr_entry->offset;
9352                 submap_info->protection = curr_entry->protection;
9353                 submap_info->inheritance = curr_entry->inheritance;
9354                 submap_info->max_protection = curr_entry->max_protection;
9355                 submap_info->behavior = curr_entry->behavior;
9356                 submap_info->user_wired_count = curr_entry->user_wired_count;
9357                 submap_info->is_submap = curr_entry->is_sub_map;
9358                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9359         } else {
9360                 short_info->user_tag = curr_entry->alias;
9361                 short_info->offset = curr_entry->offset;
9362                 short_info->protection = curr_entry->protection;
9363                 short_info->inheritance = curr_entry->inheritance;
9364                 short_info->max_protection = curr_entry->max_protection;
9365                 short_info->behavior = curr_entry->behavior;
9366                 short_info->user_wired_count = curr_entry->user_wired_count;
9367                 short_info->is_submap = curr_entry->is_sub_map;
9368                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9369         }
9370
9371         extended.pages_resident = 0;
9372         extended.pages_swapped_out = 0;
9373         extended.pages_shared_now_private = 0;
9374         extended.pages_dirtied = 0;
9375         extended.external_pager = 0;
9376         extended.shadow_depth = 0;
9377
9378         if (not_in_kdp) {
9379                 if (!curr_entry->is_sub_map) {
9380                         vm_map_offset_t range_start, range_end;
9381                         range_start = MAX((curr_address - curr_max_below),
9382                                           curr_entry->vme_start);
9383                         range_end = MIN((curr_address + curr_max_above),
9384                                         curr_entry->vme_end);
9385                         vm_map_region_walk(curr_map,
9386                                            range_start,
9387                                            curr_entry,
9388                                            (curr_entry->offset +
9389                                             (range_start -
9390                                              curr_entry->vme_start)),
9391                                            range_end - range_start,
9392                                            &extended,
9393                                            look_for_pages);
9394                         if (extended.external_pager &&
9395                             extended.ref_count == 2 &&
9396                             extended.share_mode == SM_SHARED) {
9397                                 extended.share_mode = SM_PRIVATE;
9398                         }
9399                 } else {
9400                         if (curr_entry->use_pmap) {
9401                                 extended.share_mode = SM_TRUESHARED;
9402                         } else {
9403                                 extended.share_mode = SM_PRIVATE;
9404                         }
9405                         extended.ref_count =
9406                                 curr_entry->object.sub_map->ref_count;
9407                 }
9408         }
9409
9410         if (look_for_pages) {
9411                 submap_info->pages_resident = extended.pages_resident;
9412                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9413                 submap_info->pages_shared_now_private =
9414                         extended.pages_shared_now_private;
9415                 submap_info->pages_dirtied = extended.pages_dirtied;
9416                 submap_info->external_pager = extended.external_pager;
9417                 submap_info->shadow_depth = extended.shadow_depth;
9418                 submap_info->share_mode = extended.share_mode;
9419                 submap_info->ref_count = extended.ref_count;
9420         } else {
9421                 short_info->external_pager = extended.external_pager;
9422                 short_info->shadow_depth = extended.shadow_depth;
9423                 short_info->share_mode = extended.share_mode;
9424                 short_info->ref_count = extended.ref_count;
9425         }
9426
9427         if (not_in_kdp) {
9428                 vm_map_unlock_read(curr_map);
9429         }
9430
9431         return KERN_SUCCESS;
9432 }
9433
9434 /*
9435  *      vm_region:
9436  *
9437  *      User call to obtain information about a region in
9438  *      a task's address map. Currently, only one flavor is
9439  *      supported.
9440  *
9441  *      XXX The reserved and behavior fields cannot be filled
9442  *          in until the vm merge from the IK is completed, and
9443  *          vm_reserve is implemented.
9444  */
9445
9446 kern_return_t
9447 vm_map_region(
9448         vm_map_t                 map,
9449         vm_map_offset_t *address,               /* IN/OUT */
9450         vm_map_size_t           *size,                  /* OUT */
9451         vm_region_flavor_t       flavor,                /* IN */
9452         vm_region_info_t         info,                  /* OUT */
9453         mach_msg_type_number_t  *count, /* IN/OUT */
9454         mach_port_t             *object_name)           /* OUT */
9455 {
9456         vm_map_entry_t          tmp_entry;
9457         vm_map_entry_t          entry;
9458         vm_map_offset_t         start;
9459
9460         if (map == VM_MAP_NULL)
9461                 return(KERN_INVALID_ARGUMENT);
9462
9463         switch (flavor) {
9464
9465         case VM_REGION_BASIC_INFO:
9466                 /* legacy for old 32-bit objects info */
9467         {
9468                 vm_region_basic_info_t  basic;
9469
9470                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9471                         return(KERN_INVALID_ARGUMENT);
9472
9473                 basic = (vm_region_basic_info_t) info;
9474                 *count = VM_REGION_BASIC_INFO_COUNT;
9475
9476                 vm_map_lock_read(map);
9477
9478                 start = *address;
9479                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9480                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9481                                 vm_map_unlock_read(map);
9482                                 return(KERN_INVALID_ADDRESS);
9483                         }
9484                 } else {
9485                         entry = tmp_entry;
9486                 }
9487
9488                 start = entry->vme_start;
9489
9490                 basic->offset = (uint32_t)entry->offset;
9491                 basic->protection = entry->protection;
9492                 basic->inheritance = entry->inheritance;
9493                 basic->max_protection = entry->max_protection;
9494                 basic->behavior = entry->behavior;
9495                 basic->user_wired_count = entry->user_wired_count;
9496                 basic->reserved = entry->is_sub_map;
9497                 *address = start;
9498                 *size = (entry->vme_end - start);
9499
9500                 if (object_name) *object_name = IP_NULL;
9501                 if (entry->is_sub_map) {
9502                         basic->shared = FALSE;
9503                 } else {
9504                         basic->shared = entry->is_shared;
9505                 }
9506
9507                 vm_map_unlock_read(map);
9508                 return(KERN_SUCCESS);
9509         }
9510
9511         case VM_REGION_BASIC_INFO_64:
9512         {
9513                 vm_region_basic_info_64_t       basic;
9514
9515                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9516                         return(KERN_INVALID_ARGUMENT);
9517
9518                 basic = (vm_region_basic_info_64_t) info;
9519                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9520
9521                 vm_map_lock_read(map);
9522
9523                 start = *address;
9524                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9525                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9526                                 vm_map_unlock_read(map);
9527                                 return(KERN_INVALID_ADDRESS);
9528                         }
9529                 } else {
9530                         entry = tmp_entry;
9531                 }
9532
9533                 start = entry->vme_start;
9534
9535                 basic->offset = entry->offset;
9536                 basic->protection = entry->protection;
9537                 basic->inheritance = entry->inheritance;
9538                 basic->max_protection = entry->max_protection;
9539                 basic->behavior = entry->behavior;
9540                 basic->user_wired_count = entry->user_wired_count;
9541                 basic->reserved = entry->is_sub_map;
9542                 *address = start;
9543                 *size = (entry->vme_end - start);
9544
9545                 if (object_name) *object_name = IP_NULL;
9546                 if (entry->is_sub_map) {
9547                         basic->shared = FALSE;
9548                 } else {
9549                         basic->shared = entry->is_shared;
9550                 }
9551
9552                 vm_map_unlock_read(map);
9553                 return(KERN_SUCCESS);
9554         }
9555         case VM_REGION_EXTENDED_INFO:
9556         {
9557                 vm_region_extended_info_t       extended;
9558
9559                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9560                         return(KERN_INVALID_ARGUMENT);
9561
9562                 extended = (vm_region_extended_info_t) info;
9563                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9564
9565                 vm_map_lock_read(map);
9566
9567                 start = *address;
9568                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9569                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9570                                 vm_map_unlock_read(map);
9571                                 return(KERN_INVALID_ADDRESS);
9572                         }
9573                 } else {
9574                         entry = tmp_entry;
9575                 }
9576                 start = entry->vme_start;
9577
9578                 extended->protection = entry->protection;
9579                 extended->user_tag = entry->alias;
9580                 extended->pages_resident = 0;
9581                 extended->pages_swapped_out = 0;
9582                 extended->pages_shared_now_private = 0;
9583                 extended->pages_dirtied = 0;
9584                 extended->external_pager = 0;
9585                 extended->shadow_depth = 0;
9586
9587                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9588
9589                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9590                         extended->share_mode = SM_PRIVATE;
9591
9592                 if (object_name)
9593                         *object_name = IP_NULL;
9594                 *address = start;
9595                 *size = (entry->vme_end - start);
9596
9597                 vm_map_unlock_read(map);
9598                 return(KERN_SUCCESS);
9599         }
9600         case VM_REGION_TOP_INFO:
9601         {
9602                 vm_region_top_info_t    top;
9603
9604                 if (*count < VM_REGION_TOP_INFO_COUNT)
9605                         return(KERN_INVALID_ARGUMENT);
9606
9607                 top = (vm_region_top_info_t) info;
9608                 *count = VM_REGION_TOP_INFO_COUNT;
9609
9610                 vm_map_lock_read(map);
9611
9612                 start = *address;
9613                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9614                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9615                                 vm_map_unlock_read(map);
9616                                 return(KERN_INVALID_ADDRESS);
9617                         }
9618                 } else {
9619                         entry = tmp_entry;
9620
9621                 }
9622                 start = entry->vme_start;
9623
9624                 top->private_pages_resident = 0;
9625                 top->shared_pages_resident = 0;
9626
9627                 vm_map_region_top_walk(entry, top);
9628
9629                 if (object_name)
9630                         *object_name = IP_NULL;
9631                 *address = start;
9632                 *size = (entry->vme_end - start);
9633
9634                 vm_map_unlock_read(map);
9635                 return(KERN_SUCCESS);
9636         }
9637         default:
9638                 return(KERN_INVALID_ARGUMENT);
9639         }
9640 }
9641
9642 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9643         MIN((entry_size),                                               \
9644             ((obj)->all_reusable ?                                      \
9645              (obj)->wired_page_count :                                  \
9646              (obj)->resident_page_count - (obj)->reusable_page_count))
9647
9648 void
9649 vm_map_region_top_walk(
9650         vm_map_entry_t             entry,
9651         vm_region_top_info_t       top)
9652 {
9653
9654         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9655                 top->share_mode = SM_EMPTY;
9656                 top->ref_count = 0;
9657                 top->obj_id = 0;
9658                 return;
9659         }
9660
9661         {
9662                 struct  vm_object *obj, *tmp_obj;
9663                 int             ref_count;
9664                 uint32_t        entry_size;
9665
9666                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9667
9668                 obj = entry->object.vm_object;
9669
9670                 vm_object_lock(obj);
9671
9672                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9673                         ref_count--;
9674
9675                 assert(obj->reusable_page_count <= obj->resident_page_count);
9676                 if (obj->shadow) {
9677                         if (ref_count == 1)
9678                                 top->private_pages_resident =
9679                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9680                         else
9681                                 top->shared_pages_resident =
9682                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9683                         top->ref_count  = ref_count;
9684                         top->share_mode = SM_COW;
9685
9686                         while ((tmp_obj = obj->shadow)) {
9687                                 vm_object_lock(tmp_obj);
9688                                 vm_object_unlock(obj);
9689                                 obj = tmp_obj;
9690
9691                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9692                                         ref_count--;
9693
9694                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9695                                 top->shared_pages_resident +=
9696                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9697                                 top->ref_count += ref_count - 1;
9698                         }
9699                 } else {
9700                         if (entry->superpage_size) {
9701                                 top->share_mode = SM_LARGE_PAGE;
9702                                 top->shared_pages_resident = 0;
9703                                 top->private_pages_resident = entry_size;
9704                         } else if (entry->needs_copy) {
9705                                 top->share_mode = SM_COW;
9706                                 top->shared_pages_resident =
9707                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9708                         } else {
9709                                 if (ref_count == 1 ||
9710                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9711                                         top->share_mode = SM_PRIVATE;
9712                                         top->private_pages_resident =
9713                                                 OBJ_RESIDENT_COUNT(obj,
9714                                                                    entry_size);
9715                                 } else {
9716                                         top->share_mode = SM_SHARED;
9717                                         top->shared_pages_resident =
9718                                                 OBJ_RESIDENT_COUNT(obj,
9719                                                                   entry_size);
9720                                 }
9721                         }
9722                         top->ref_count = ref_count;
9723                 }
9724                 /* XXX K64: obj_id will be truncated */
9725                 top->obj_id = (unsigned int) (uintptr_t)obj;
9726
9727                 vm_object_unlock(obj);
9728         }
9729 }
9730
9731 void
9732 vm_map_region_walk(
9733         vm_map_t                        map,
9734         vm_map_offset_t                 va,
9735         vm_map_entry_t                  entry,
9736         vm_object_offset_t              offset,
9737         vm_object_size_t                range,
9738         vm_region_extended_info_t       extended,
9739         boolean_t                       look_for_pages)
9740 {
9741         register struct vm_object *obj, *tmp_obj;
9742         register vm_map_offset_t       last_offset;
9743         register int               i;
9744         register int               ref_count;
9745         struct vm_object        *shadow_object;
9746         int                     shadow_depth;
9747
9748         if ((entry->object.vm_object == 0) ||
9749             (entry->is_sub_map) ||
9750             (entry->object.vm_object->phys_contiguous &&
9751              !entry->superpage_size)) {
9752                 extended->share_mode = SM_EMPTY;
9753                 extended->ref_count = 0;
9754                 return;
9755         }
9756
9757         if (entry->superpage_size) {
9758                 extended->shadow_depth = 0;
9759                 extended->share_mode = SM_LARGE_PAGE;
9760                 extended->ref_count = 1;
9761                 extended->external_pager = 0;
9762                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9763                 extended->shadow_depth = 0;
9764                 return;
9765         }
9766
9767         {
9768                 obj = entry->object.vm_object;
9769
9770                 vm_object_lock(obj);
9771
9772                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9773                         ref_count--;
9774
9775                 if (look_for_pages) {
9776                         for (last_offset = offset + range;
9777                              offset < last_offset;
9778                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9779                                 vm_map_region_look_for_page(map, va, obj,
9780                                                             offset, ref_count,
9781                                                             0, extended);
9782                 } else {
9783                         shadow_object = obj->shadow;
9784                         shadow_depth = 0;
9785
9786                         if ( !(obj->pager_trusted) && !(obj->internal))
9787                                 extended->external_pager = 1;
9788
9789                         if (shadow_object != VM_OBJECT_NULL) {
9790                                 vm_object_lock(shadow_object);
9791                                 for (;
9792                                      shadow_object != VM_OBJECT_NULL;
9793                                      shadow_depth++) {
9794                                         vm_object_t     next_shadow;
9795
9796                                         if ( !(shadow_object->pager_trusted) &&
9797                                              !(shadow_object->internal))
9798                                                 extended->external_pager = 1;
9799
9800                                         next_shadow = shadow_object->shadow;
9801                                         if (next_shadow) {
9802                                                 vm_object_lock(next_shadow);
9803                                         }
9804                                         vm_object_unlock(shadow_object);
9805                                         shadow_object = next_shadow;
9806                                 }
9807                         }
9808                         extended->shadow_depth = shadow_depth;
9809                 }
9810
9811                 if (extended->shadow_depth || entry->needs_copy)
9812                         extended->share_mode = SM_COW;
9813                 else {
9814                         if (ref_count == 1)
9815                                 extended->share_mode = SM_PRIVATE;
9816                         else {
9817                                 if (obj->true_share)
9818                                         extended->share_mode = SM_TRUESHARED;
9819                                 else
9820                                         extended->share_mode = SM_SHARED;
9821                         }
9822                 }
9823                 extended->ref_count = ref_count - extended->shadow_depth;
9824
9825                 for (i = 0; i < extended->shadow_depth; i++) {
9826                         if ((tmp_obj = obj->shadow) == 0)
9827                                 break;
9828                         vm_object_lock(tmp_obj);
9829                         vm_object_unlock(obj);
9830
9831                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9832                                 ref_count--;
9833
9834                         extended->ref_count += ref_count;
9835                         obj = tmp_obj;
9836                 }
9837                 vm_object_unlock(obj);
9838
9839                 if (extended->share_mode == SM_SHARED) {
9840                         register vm_map_entry_t      cur;
9841                         register vm_map_entry_t      last;
9842                         int      my_refs;
9843
9844                         obj = entry->object.vm_object;
9845                         last = vm_map_to_entry(map);
9846                         my_refs = 0;
9847
9848                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9849                                 ref_count--;
9850                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9851                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
9852
9853                         if (my_refs == ref_count)
9854                                 extended->share_mode = SM_PRIVATE_ALIASED;
9855                         else if (my_refs > 1)
9856                                 extended->share_mode = SM_SHARED_ALIASED;
9857                 }
9858         }
9859 }
9860
9861
9862 /* object is locked on entry and locked on return */
9863
9864
9865 static void
9866 vm_map_region_look_for_page(
9867         __unused vm_map_t               map,
9868         __unused vm_map_offset_t        va,
9869         vm_object_t                     object,
9870         vm_object_offset_t              offset,
9871         int                             max_refcnt,
9872         int                             depth,
9873         vm_region_extended_info_t       extended)
9874 {
9875         register vm_page_t      p;
9876         register vm_object_t    shadow;
9877         register int            ref_count;
9878         vm_object_t             caller_object;
9879 #if     MACH_PAGEMAP
9880         kern_return_t           kr;
9881 #endif
9882         shadow = object->shadow;
9883         caller_object = object;
9884
9885
9886         while (TRUE) {
9887
9888                 if ( !(object->pager_trusted) && !(object->internal))
9889                         extended->external_pager = 1;
9890
9891                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9892                         if (shadow && (max_refcnt == 1))
9893                                 extended->pages_shared_now_private++;
9894
9895                         if (!p->fictitious &&
9896                             (p->dirty || pmap_is_modified(p->phys_page)))
9897                                 extended->pages_dirtied++;
9898
9899                         extended->pages_resident++;
9900
9901                         if(object != caller_object)
9902                                 vm_object_unlock(object);
9903
9904                         return;
9905                 }
9906 #if     MACH_PAGEMAP
9907                 if (object->existence_map) {
9908                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9909
9910                                 extended->pages_swapped_out++;
9911
9912                                 if(object != caller_object)
9913                                         vm_object_unlock(object);
9914
9915                                 return;
9916                         }
9917                 } else if (object->internal &&
9918                            object->alive &&
9919                            !object->terminating &&
9920                            object->pager_ready) {
9921
9922                         memory_object_t pager;
9923
9924                         vm_object_paging_begin(object);
9925                         pager = object->pager;
9926                         vm_object_unlock(object);
9927
9928                         kr = memory_object_data_request(
9929                                 pager,
9930                                 offset + object->paging_offset,
9931                                 0, /* just poke the pager */
9932                                 VM_PROT_READ,
9933                                 NULL);
9934
9935                         vm_object_lock(object);
9936                         vm_object_paging_end(object);
9937
9938                         if (kr == KERN_SUCCESS) {
9939                                 /* the pager has that page */
9940                                 extended->pages_swapped_out++;
9941                                 if (object != caller_object)
9942                                         vm_object_unlock(object);
9943                                 return;
9944                         }
9945                 }
9946 #endif /* MACH_PAGEMAP */
9947
9948                 if (shadow) {
9949                         vm_object_lock(shadow);
9950
9951                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9952                                 ref_count--;
9953
9954                         if (++depth > extended->shadow_depth)
9955                                 extended->shadow_depth = depth;
9956
9957                         if (ref_count > max_refcnt)
9958                                 max_refcnt = ref_count;
9959
9960                         if(object != caller_object)
9961                                 vm_object_unlock(object);
9962
9963                         offset = offset + object->vo_shadow_offset;
9964                         object = shadow;
9965                         shadow = object->shadow;
9966                         continue;
9967                 }
9968                 if(object != caller_object)
9969                         vm_object_unlock(object);
9970                 break;
9971         }
9972 }
9973
9974 static int
9975 vm_map_region_count_obj_refs(
9976         vm_map_entry_t    entry,
9977         vm_object_t       object)
9978 {
9979         register int ref_count;
9980         register vm_object_t chk_obj;
9981         register vm_object_t tmp_obj;
9982
9983         if (entry->object.vm_object == 0)
9984                 return(0);
9985
9986         if (entry->is_sub_map)
9987                 return(0);
9988         else {
9989                 ref_count = 0;
9990
9991                 chk_obj = entry->object.vm_object;
9992                 vm_object_lock(chk_obj);
9993
9994                 while (chk_obj) {
9995                         if (chk_obj == object)
9996                                 ref_count++;
9997                         tmp_obj = chk_obj->shadow;
9998                         if (tmp_obj)
9999                                 vm_object_lock(tmp_obj);
10000                         vm_object_unlock(chk_obj);
10001
10002                         chk_obj = tmp_obj;
10003                 }
10004         }
10005         return(ref_count);
10006 }
10007
10008
10009 /*
10010  *      Routine:        vm_map_simplify
10011  *
10012  *      Description:
10013  *              Attempt to simplify the map representation in
10014  *              the vicinity of the given starting address.
10015  *      Note:
10016  *              This routine is intended primarily to keep the
10017  *              kernel maps more compact -- they generally don't
10018  *              benefit from the "expand a map entry" technology
10019  *              at allocation time because the adjacent entry
10020  *              is often wired down.
10021  */
10022 void
10023 vm_map_simplify_entry(
10024         vm_map_t        map,
10025         vm_map_entry_t  this_entry)
10026 {
10027         vm_map_entry_t  prev_entry;
10028
10029         counter(c_vm_map_simplify_entry_called++);
10030
10031         prev_entry = this_entry->vme_prev;
10032
10033         if ((this_entry != vm_map_to_entry(map)) &&
10034             (prev_entry != vm_map_to_entry(map)) &&
10035
10036             (prev_entry->vme_end == this_entry->vme_start) &&
10037
10038             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10039
10040             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10041             ((prev_entry->offset + (prev_entry->vme_end -
10042                                     prev_entry->vme_start))
10043              == this_entry->offset) &&
10044
10045             (prev_entry->inheritance == this_entry->inheritance) &&
10046             (prev_entry->protection == this_entry->protection) &&
10047             (prev_entry->max_protection == this_entry->max_protection) &&
10048             (prev_entry->behavior == this_entry->behavior) &&
10049             (prev_entry->alias == this_entry->alias) &&
10050             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10051             (prev_entry->no_cache == this_entry->no_cache) &&
10052             (prev_entry->wired_count == this_entry->wired_count) &&
10053             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10054
10055             (prev_entry->needs_copy == this_entry->needs_copy) &&
10056             (prev_entry->permanent == this_entry->permanent) &&
10057
10058             (prev_entry->use_pmap == FALSE) &&
10059             (this_entry->use_pmap == FALSE) &&
10060             (prev_entry->in_transition == FALSE) &&
10061             (this_entry->in_transition == FALSE) &&
10062             (prev_entry->needs_wakeup == FALSE) &&
10063             (this_entry->needs_wakeup == FALSE) &&
10064             (prev_entry->is_shared == FALSE) &&
10065             (this_entry->is_shared == FALSE)
10066                 ) {
10067                 _vm_map_store_entry_unlink(&map->hdr, prev_entry);
10068                 assert(prev_entry->vme_start < this_entry->vme_end);
10069                 this_entry->vme_start = prev_entry->vme_start;
10070                 this_entry->offset = prev_entry->offset;
10071                 if (prev_entry->is_sub_map) {
10072                         vm_map_deallocate(prev_entry->object.sub_map);
10073                 } else {
10074                         vm_object_deallocate(prev_entry->object.vm_object);
10075                 }
10076                 vm_map_entry_dispose(map, prev_entry);
10077                 SAVE_HINT_MAP_WRITE(map, this_entry);
10078                 counter(c_vm_map_simplified++);
10079         }
10080 }
10081
10082 void
10083 vm_map_simplify(
10084         vm_map_t        map,
10085         vm_map_offset_t start)
10086 {
10087         vm_map_entry_t  this_entry;
10088
10089         vm_map_lock(map);
10090         if (vm_map_lookup_entry(map, start, &this_entry)) {
10091                 vm_map_simplify_entry(map, this_entry);
10092                 vm_map_simplify_entry(map, this_entry->vme_next);
10093         }
10094         counter(c_vm_map_simplify_called++);
10095         vm_map_unlock(map);
10096 }
10097
10098 static void
10099 vm_map_simplify_range(
10100         vm_map_t        map,
10101         vm_map_offset_t start,
10102         vm_map_offset_t end)
10103 {
10104         vm_map_entry_t  entry;
10105
10106         /*
10107          * The map should be locked (for "write") by the caller.
10108          */
10109
10110         if (start >= end) {
10111                 /* invalid address range */
10112                 return;
10113         }
10114
10115         start = vm_map_trunc_page(start);
10116         end = vm_map_round_page(end);
10117
10118         if (!vm_map_lookup_entry(map, start, &entry)) {
10119                 /* "start" is not mapped and "entry" ends before "start" */
10120                 if (entry == vm_map_to_entry(map)) {
10121                         /* start with first entry in the map */
10122                         entry = vm_map_first_entry(map);
10123                 } else {
10124                         /* start with next entry */
10125                         entry = entry->vme_next;
10126                 }
10127         }
10128
10129         while (entry != vm_map_to_entry(map) &&
10130                entry->vme_start <= end) {
10131                 /* try and coalesce "entry" with its previous entry */
10132                 vm_map_simplify_entry(map, entry);
10133                 entry = entry->vme_next;
10134         }
10135 }
10136
10137
10138 /*
10139  *      Routine:        vm_map_machine_attribute
10140  *      Purpose:
10141  *              Provide machine-specific attributes to mappings,
10142  *              such as cachability etc. for machines that provide
10143  *              them.  NUMA architectures and machines with big/strange
10144  *              caches will use this.
10145  *      Note:
10146  *              Responsibilities for locking and checking are handled here,
10147  *              everything else in the pmap module. If any non-volatile
10148  *              information must be kept, the pmap module should handle
10149  *              it itself. [This assumes that attributes do not
10150  *              need to be inherited, which seems ok to me]
10151  */
10152 kern_return_t
10153 vm_map_machine_attribute(
10154         vm_map_t                        map,
10155         vm_map_offset_t         start,
10156         vm_map_offset_t         end,
10157         vm_machine_attribute_t  attribute,
10158         vm_machine_attribute_val_t* value)              /* IN/OUT */
10159 {
10160         kern_return_t   ret;
10161         vm_map_size_t sync_size;
10162         vm_map_entry_t entry;
10163
10164         if (start < vm_map_min(map) || end > vm_map_max(map))
10165                 return KERN_INVALID_ADDRESS;
10166
10167         /* Figure how much memory we need to flush (in page increments) */
10168         sync_size = end - start;
10169
10170         vm_map_lock(map);
10171
10172         if (attribute != MATTR_CACHE) {
10173                 /* If we don't have to find physical addresses, we */
10174                 /* don't have to do an explicit traversal here.    */
10175                 ret = pmap_attribute(map->pmap, start, end-start,
10176                                      attribute, value);
10177                 vm_map_unlock(map);
10178                 return ret;
10179         }
10180
10181         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
10182
10183         while(sync_size) {
10184                 if (vm_map_lookup_entry(map, start, &entry)) {
10185                         vm_map_size_t   sub_size;
10186                         if((entry->vme_end - start) > sync_size) {
10187                                 sub_size = sync_size;
10188                                 sync_size = 0;
10189                         } else {
10190                                 sub_size = entry->vme_end - start;
10191                                 sync_size -= sub_size;
10192                         }
10193                         if(entry->is_sub_map) {
10194                                 vm_map_offset_t sub_start;
10195                                 vm_map_offset_t sub_end;
10196
10197                                 sub_start = (start - entry->vme_start)
10198                                         + entry->offset;
10199                                 sub_end = sub_start + sub_size;
10200                                 vm_map_machine_attribute(
10201                                         entry->object.sub_map,
10202                                         sub_start,
10203                                         sub_end,
10204                                         attribute, value);
10205                         } else {
10206                                 if(entry->object.vm_object) {
10207                                         vm_page_t               m;
10208                                         vm_object_t             object;
10209                                         vm_object_t             base_object;
10210                                         vm_object_t             last_object;
10211                                         vm_object_offset_t      offset;
10212                                         vm_object_offset_t      base_offset;
10213                                         vm_map_size_t           range;
10214                                         range = sub_size;
10215                                         offset = (start - entry->vme_start)
10216                                                 + entry->offset;
10217                                         base_offset = offset;
10218                                         object = entry->object.vm_object;
10219                                         base_object = object;
10220                                         last_object = NULL;
10221
10222                                         vm_object_lock(object);
10223
10224                                         while (range) {
10225                                                 m = vm_page_lookup(
10226                                                         object, offset);
10227
10228                                                 if (m && !m->fictitious) {
10229                                                         ret =
10230                                                                 pmap_attribute_cache_sync(
10231                                                                         m->phys_page,
10232                                                                         PAGE_SIZE,
10233                                                                         attribute, value);
10234
10235                                                 } else if (object->shadow) {
10236                                                         offset = offset + object->vo_shadow_offset;
10237                                                         last_object = object;
10238                                                         object = object->shadow;
10239                                                         vm_object_lock(last_object->shadow);
10240                                                         vm_object_unlock(last_object);
10241                                                         continue;
10242                                                 }
10243                                                 range -= PAGE_SIZE;
10244
10245                                                 if (base_object != object) {
10246                                                         vm_object_unlock(object);
10247                                                         vm_object_lock(base_object);
10248                                                         object = base_object;
10249                                                 }
10250                                                 /* Bump to the next page */
10251                                                 base_offset += PAGE_SIZE;
10252                                                 offset = base_offset;
10253                                         }
10254                                         vm_object_unlock(object);
10255                                 }
10256                         }
10257                         start += sub_size;
10258                 } else {
10259                         vm_map_unlock(map);
10260                         return KERN_FAILURE;
10261                 }
10262
10263         }
10264
10265         vm_map_unlock(map);
10266
10267         return ret;
10268 }
10269
10270 /*
10271  *      vm_map_behavior_set:
10272  *
10273  *      Sets the paging reference behavior of the specified address
10274  *      range in the target map.  Paging reference behavior affects
10275  *      how pagein operations resulting from faults on the map will be
10276  *      clustered.
10277  */
10278 kern_return_t
10279 vm_map_behavior_set(
10280         vm_map_t        map,
10281         vm_map_offset_t start,
10282         vm_map_offset_t end,
10283         vm_behavior_t   new_behavior)
10284 {
10285         register vm_map_entry_t entry;
10286         vm_map_entry_t  temp_entry;
10287
10288         XPR(XPR_VM_MAP,
10289             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10290             map, start, end, new_behavior, 0);
10291
10292         if (start > end ||
10293             start < vm_map_min(map) ||
10294             end > vm_map_max(map)) {
10295                 return KERN_NO_SPACE;
10296         }
10297
10298         switch (new_behavior) {
10299
10300         /*
10301          * This first block of behaviors all set a persistent state on the specified
10302          * memory range.  All we have to do here is to record the desired behavior
10303          * in the vm_map_entry_t's.
10304          */
10305
10306         case VM_BEHAVIOR_DEFAULT:
10307         case VM_BEHAVIOR_RANDOM:
10308         case VM_BEHAVIOR_SEQUENTIAL:
10309         case VM_BEHAVIOR_RSEQNTL:
10310         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10311                 vm_map_lock(map);
10312
10313                 /*
10314                  *      The entire address range must be valid for the map.
10315                  *      Note that vm_map_range_check() does a
10316                  *      vm_map_lookup_entry() internally and returns the
10317                  *      entry containing the start of the address range if
10318                  *      the entire range is valid.
10319                  */
10320                 if (vm_map_range_check(map, start, end, &temp_entry)) {
10321                         entry = temp_entry;
10322                         vm_map_clip_start(map, entry, start);
10323                 }
10324                 else {
10325                         vm_map_unlock(map);
10326                         return(KERN_INVALID_ADDRESS);
10327                 }
10328
10329                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10330                         vm_map_clip_end(map, entry, end);
10331                         assert(!entry->use_pmap);
10332
10333                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10334                                 entry->zero_wired_pages = TRUE;
10335                         } else {
10336                                 entry->behavior = new_behavior;
10337                         }
10338                         entry = entry->vme_next;
10339                 }
10340
10341                 vm_map_unlock(map);
10342                 break;
10343
10344         /*
10345          * The rest of these are different from the above in that they cause
10346          * an immediate action to take place as opposed to setting a behavior that
10347          * affects future actions.
10348          */
10349
10350         case VM_BEHAVIOR_WILLNEED:
10351                 return vm_map_willneed(map, start, end);
10352
10353         case VM_BEHAVIOR_DONTNEED:
10354                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10355
10356         case VM_BEHAVIOR_FREE:
10357                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10358
10359         case VM_BEHAVIOR_REUSABLE:
10360                 return vm_map_reusable_pages(map, start, end);
10361
10362         case VM_BEHAVIOR_REUSE:
10363                 return vm_map_reuse_pages(map, start, end);
10364
10365         case VM_BEHAVIOR_CAN_REUSE:
10366                 return vm_map_can_reuse(map, start, end);
10367
10368         default:
10369                 return(KERN_INVALID_ARGUMENT);
10370         }
10371
10372         return(KERN_SUCCESS);
10373 }
10374
10375
10376 /*
10377  * Internals for madvise(MADV_WILLNEED) system call.
10378  *
10379  * The present implementation is to do a read-ahead if the mapping corresponds
10380  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10381  * and basically ignore the "advice" (which we are always free to do).
10382  */
10383
10384
10385 static kern_return_t
10386 vm_map_willneed(
10387         vm_map_t        map,
10388         vm_map_offset_t start,
10389         vm_map_offset_t end
10390 )
10391 {
10392         vm_map_entry_t                  entry;
10393         vm_object_t                     object;
10394         memory_object_t                 pager;
10395         struct vm_object_fault_info     fault_info;
10396         kern_return_t                   kr;
10397         vm_object_size_t                len;
10398         vm_object_offset_t              offset;
10399
10400         /*
10401          * Fill in static values in fault_info.  Several fields get ignored by the code
10402          * we call, but we'll fill them in anyway since uninitialized fields are bad
10403          * when it comes to future backwards compatibility.
10404          */
10405
10406         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10407         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10408         fault_info.no_cache      = FALSE;                       /* ignored value */
10409         fault_info.stealth       = TRUE;
10410         fault_info.io_sync = FALSE;
10411         fault_info.cs_bypass = FALSE;
10412         fault_info.mark_zf_absent = FALSE;
10413
10414         /*
10415          * The MADV_WILLNEED operation doesn't require any changes to the
10416          * vm_map_entry_t's, so the read lock is sufficient.
10417          */
10418
10419         vm_map_lock_read(map);
10420
10421         /*
10422          * The madvise semantics require that the address range be fully
10423          * allocated with no holes.  Otherwise, we're required to return
10424          * an error.
10425          */
10426
10427         if (! vm_map_range_check(map, start, end, &entry)) {
10428                 vm_map_unlock_read(map);
10429                 return KERN_INVALID_ADDRESS;
10430         }
10431
10432         /*
10433          * Examine each vm_map_entry_t in the range.
10434          */
10435         for (; entry != vm_map_to_entry(map) && start < end; ) {
10436
10437                 /*
10438                  * The first time through, the start address could be anywhere
10439                  * within the vm_map_entry we found.  So adjust the offset to
10440                  * correspond.  After that, the offset will always be zero to
10441                  * correspond to the beginning of the current vm_map_entry.
10442                  */
10443                 offset = (start - entry->vme_start) + entry->offset;
10444
10445                 /*
10446                  * Set the length so we don't go beyond the end of the
10447                  * map_entry or beyond the end of the range we were given.
10448                  * This range could span also multiple map entries all of which
10449                  * map different files, so make sure we only do the right amount
10450                  * of I/O for each object.  Note that it's possible for there
10451                  * to be multiple map entries all referring to the same object
10452                  * but with different page permissions, but it's not worth
10453                  * trying to optimize that case.
10454                  */
10455                 len = MIN(entry->vme_end - start, end - start);
10456
10457                 if ((vm_size_t) len != len) {
10458                         /* 32-bit overflow */
10459                         len = (vm_size_t) (0 - PAGE_SIZE);
10460                 }
10461                 fault_info.cluster_size = (vm_size_t) len;
10462                 fault_info.lo_offset    = offset;
10463                 fault_info.hi_offset    = offset + len;
10464                 fault_info.user_tag     = entry->alias;
10465
10466                 /*
10467                  * If there's no read permission to this mapping, then just
10468                  * skip it.
10469                  */
10470                 if ((entry->protection & VM_PROT_READ) == 0) {
10471                         entry = entry->vme_next;
10472                         start = entry->vme_start;
10473                         continue;
10474                 }
10475
10476                 /*
10477                  * Find the file object backing this map entry.  If there is
10478                  * none, then we simply ignore the "will need" advice for this
10479                  * entry and go on to the next one.
10480                  */
10481                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10482                         entry = entry->vme_next;
10483                         start = entry->vme_start;
10484                         continue;
10485                 }
10486
10487                 /*
10488                  * The data_request() could take a long time, so let's
10489                  * release the map lock to avoid blocking other threads.
10490                  */
10491                 vm_map_unlock_read(map);
10492
10493                 vm_object_paging_begin(object);
10494                 pager = object->pager;
10495                 vm_object_unlock(object);
10496
10497                 /*
10498                  * Get the data from the object asynchronously.
10499                  *
10500                  * Note that memory_object_data_request() places limits on the
10501                  * amount of I/O it will do.  Regardless of the len we
10502                  * specified, it won't do more than MAX_UPL_TRANSFER and it
10503                  * silently truncates the len to that size.  This isn't
10504                  * necessarily bad since madvise shouldn't really be used to
10505                  * page in unlimited amounts of data.  Other Unix variants
10506                  * limit the willneed case as well.  If this turns out to be an
10507                  * issue for developers, then we can always adjust the policy
10508                  * here and still be backwards compatible since this is all
10509                  * just "advice".
10510                  */
10511                 kr = memory_object_data_request(
10512                         pager,
10513                         offset + object->paging_offset,
10514                         0,      /* ignored */
10515                         VM_PROT_READ,
10516                         (memory_object_fault_info_t)&fault_info);
10517
10518                 vm_object_lock(object);
10519                 vm_object_paging_end(object);
10520                 vm_object_unlock(object);
10521
10522                 /*
10523                  * If we couldn't do the I/O for some reason, just give up on
10524                  * the madvise.  We still return success to the user since
10525                  * madvise isn't supposed to fail when the advice can't be
10526                  * taken.
10527                  */
10528                 if (kr != KERN_SUCCESS) {
10529                         return KERN_SUCCESS;
10530                 }
10531
10532                 start += len;
10533                 if (start >= end) {
10534                         /* done */
10535                         return KERN_SUCCESS;
10536                 }
10537
10538                 /* look up next entry */
10539                 vm_map_lock_read(map);
10540                 if (! vm_map_lookup_entry(map, start, &entry)) {
10541                         /*
10542                          * There's a new hole in the address range.
10543                          */
10544                         vm_map_unlock_read(map);
10545                         return KERN_INVALID_ADDRESS;
10546                 }
10547         }
10548
10549         vm_map_unlock_read(map);
10550         return KERN_SUCCESS;
10551 }
10552
10553 static boolean_t
10554 vm_map_entry_is_reusable(
10555         vm_map_entry_t entry)
10556 {
10557         vm_object_t object;
10558
10559         if (entry->is_shared ||
10560             entry->is_sub_map ||
10561             entry->in_transition ||
10562             entry->protection != VM_PROT_DEFAULT ||
10563             entry->max_protection != VM_PROT_ALL ||
10564             entry->inheritance != VM_INHERIT_DEFAULT ||
10565             entry->no_cache ||
10566             entry->permanent ||
10567             entry->superpage_size != 0 ||
10568             entry->zero_wired_pages ||
10569             entry->wired_count != 0 ||
10570             entry->user_wired_count != 0) {
10571                 return FALSE;
10572         }
10573
10574         object = entry->object.vm_object;
10575         if (object == VM_OBJECT_NULL) {
10576                 return TRUE;
10577         }
10578         if (object->ref_count == 1 &&
10579             object->wired_page_count == 0 &&
10580             object->copy == VM_OBJECT_NULL &&
10581             object->shadow == VM_OBJECT_NULL &&
10582             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10583             object->internal &&
10584             !object->true_share &&
10585             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10586             !object->code_signed) {
10587                 return TRUE;
10588         }
10589         return FALSE;
10590
10591
10592 }
10593
10594 static kern_return_t
10595 vm_map_reuse_pages(
10596         vm_map_t        map,
10597         vm_map_offset_t start,
10598         vm_map_offset_t end)
10599 {
10600         vm_map_entry_t                  entry;
10601         vm_object_t                     object;
10602         vm_object_offset_t              start_offset, end_offset;
10603
10604         /*
10605          * The MADV_REUSE operation doesn't require any changes to the
10606          * vm_map_entry_t's, so the read lock is sufficient.
10607          */
10608
10609         vm_map_lock_read(map);
10610
10611         /*
10612          * The madvise semantics require that the address range be fully
10613          * allocated with no holes.  Otherwise, we're required to return
10614          * an error.
10615          */
10616
10617         if (!vm_map_range_check(map, start, end, &entry)) {
10618                 vm_map_unlock_read(map);
10619                 vm_page_stats_reusable.reuse_pages_failure++;
10620                 return KERN_INVALID_ADDRESS;
10621         }
10622
10623         /*
10624          * Examine each vm_map_entry_t in the range.
10625          */
10626         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10627              entry = entry->vme_next) {
10628                 /*
10629                  * Sanity check on the VM map entry.
10630                  */
10631                 if (! vm_map_entry_is_reusable(entry)) {
10632                         vm_map_unlock_read(map);
10633                         vm_page_stats_reusable.reuse_pages_failure++;
10634                         return KERN_INVALID_ADDRESS;
10635                 }
10636
10637                 /*
10638                  * The first time through, the start address could be anywhere
10639                  * within the vm_map_entry we found.  So adjust the offset to
10640                  * correspond.
10641                  */
10642                 if (entry->vme_start < start) {
10643                         start_offset = start - entry->vme_start;
10644                 } else {
10645                         start_offset = 0;
10646                 }
10647                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10648                 start_offset += entry->offset;
10649                 end_offset += entry->offset;
10650
10651                 object = entry->object.vm_object;
10652                 if (object != VM_OBJECT_NULL) {
10653                         vm_object_lock(object);
10654                         vm_object_reuse_pages(object, start_offset, end_offset,
10655                                               TRUE);
10656                         vm_object_unlock(object);
10657                 }
10658
10659                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10660                         /*
10661                          * XXX
10662                          * We do not hold the VM map exclusively here.
10663                          * The "alias" field is not that critical, so it's
10664                          * safe to update it here, as long as it is the only
10665                          * one that can be modified while holding the VM map
10666                          * "shared".
10667                          */
10668                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10669                 }
10670         }
10671
10672         vm_map_unlock_read(map);
10673         vm_page_stats_reusable.reuse_pages_success++;
10674         return KERN_SUCCESS;
10675 }
10676
10677
10678 static kern_return_t
10679 vm_map_reusable_pages(
10680         vm_map_t        map,
10681         vm_map_offset_t start,
10682         vm_map_offset_t end)
10683 {
10684         vm_map_entry_t                  entry;
10685         vm_object_t                     object;
10686         vm_object_offset_t              start_offset, end_offset;
10687
10688         /*
10689          * The MADV_REUSABLE operation doesn't require any changes to the
10690          * vm_map_entry_t's, so the read lock is sufficient.
10691          */
10692
10693         vm_map_lock_read(map);
10694
10695         /*
10696          * The madvise semantics require that the address range be fully
10697          * allocated with no holes.  Otherwise, we're required to return
10698          * an error.
10699          */
10700
10701         if (!vm_map_range_check(map, start, end, &entry)) {
10702                 vm_map_unlock_read(map);
10703                 vm_page_stats_reusable.reusable_pages_failure++;
10704                 return KERN_INVALID_ADDRESS;
10705         }
10706
10707         /*
10708          * Examine each vm_map_entry_t in the range.
10709          */
10710         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10711              entry = entry->vme_next) {
10712                 int kill_pages = 0;
10713
10714                 /*
10715                  * Sanity check on the VM map entry.
10716                  */
10717                 if (! vm_map_entry_is_reusable(entry)) {
10718                         vm_map_unlock_read(map);
10719                         vm_page_stats_reusable.reusable_pages_failure++;
10720                         return KERN_INVALID_ADDRESS;
10721                 }
10722
10723                 /*
10724                  * The first time through, the start address could be anywhere
10725                  * within the vm_map_entry we found.  So adjust the offset to
10726                  * correspond.
10727                  */
10728                 if (entry->vme_start < start) {
10729                         start_offset = start - entry->vme_start;
10730                 } else {
10731                         start_offset = 0;
10732                 }
10733                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10734                 start_offset += entry->offset;
10735                 end_offset += entry->offset;
10736
10737                 object = entry->object.vm_object;
10738                 if (object == VM_OBJECT_NULL)
10739                         continue;
10740
10741
10742                 vm_object_lock(object);
10743                 if (object->ref_count == 1 && !object->shadow)
10744                         kill_pages = 1;
10745                 else
10746                         kill_pages = -1;
10747                 if (kill_pages != -1) {
10748                         vm_object_deactivate_pages(object,
10749                                                    start_offset,
10750                                                    end_offset - start_offset,
10751                                                    kill_pages,
10752                                                    TRUE /*reusable_pages*/);
10753                 } else {
10754                         vm_page_stats_reusable.reusable_pages_shared++;
10755                 }
10756                 vm_object_unlock(object);
10757
10758                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10759                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10760                         /*
10761                          * XXX
10762                          * We do not hold the VM map exclusively here.
10763                          * The "alias" field is not that critical, so it's
10764                          * safe to update it here, as long as it is the only
10765                          * one that can be modified while holding the VM map
10766                          * "shared".
10767                          */
10768                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10769                 }
10770         }
10771
10772         vm_map_unlock_read(map);
10773         vm_page_stats_reusable.reusable_pages_success++;
10774         return KERN_SUCCESS;
10775 }
10776
10777
10778 static kern_return_t
10779 vm_map_can_reuse(
10780         vm_map_t        map,
10781         vm_map_offset_t start,
10782         vm_map_offset_t end)
10783 {
10784         vm_map_entry_t                  entry;
10785
10786         /*
10787          * The MADV_REUSABLE operation doesn't require any changes to the
10788          * vm_map_entry_t's, so the read lock is sufficient.
10789          */
10790
10791         vm_map_lock_read(map);
10792
10793         /*
10794          * The madvise semantics require that the address range be fully
10795          * allocated with no holes.  Otherwise, we're required to return
10796          * an error.
10797          */
10798
10799         if (!vm_map_range_check(map, start, end, &entry)) {
10800                 vm_map_unlock_read(map);
10801                 vm_page_stats_reusable.can_reuse_failure++;
10802                 return KERN_INVALID_ADDRESS;
10803         }
10804
10805         /*
10806          * Examine each vm_map_entry_t in the range.
10807          */
10808         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10809              entry = entry->vme_next) {
10810                 /*
10811                  * Sanity check on the VM map entry.
10812                  */
10813                 if (! vm_map_entry_is_reusable(entry)) {
10814                         vm_map_unlock_read(map);
10815                         vm_page_stats_reusable.can_reuse_failure++;
10816                         return KERN_INVALID_ADDRESS;
10817                 }
10818         }
10819
10820         vm_map_unlock_read(map);
10821         vm_page_stats_reusable.can_reuse_success++;
10822         return KERN_SUCCESS;
10823 }
10824
10825
10826
10827 #include <mach_kdb.h>
10828 #if     MACH_KDB
10829 #include <ddb/db_output.h>
10830 #include <vm/vm_print.h>
10831
10832 #define printf  db_printf
10833
10834 /*
10835  * Forward declarations for internal functions.
10836  */
10837 extern void vm_map_links_print(
10838         struct vm_map_links     *links);
10839
10840 extern void vm_map_header_print(
10841         struct vm_map_header    *header);
10842
10843 extern void vm_map_entry_print(
10844         vm_map_entry_t          entry);
10845
10846 extern void vm_follow_entry(
10847         vm_map_entry_t          entry);
10848
10849 extern void vm_follow_map(
10850         vm_map_t                map);
10851
10852 /*
10853  *      vm_map_links_print:     [ debug ]
10854  */
10855 void
10856 vm_map_links_print(
10857         struct vm_map_links     *links)
10858 {
10859         iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
10860                 links->prev,
10861                 links->next,
10862                 (unsigned long long)links->start,
10863                 (unsigned long long)links->end);
10864 }
10865
10866 /*
10867  *      vm_map_header_print:    [ debug ]
10868  */
10869 void
10870 vm_map_header_print(
10871         struct vm_map_header    *header)
10872 {
10873         vm_map_links_print(&header->links);
10874         iprintf("nentries = %08X, %sentries_pageable\n",
10875                 header->nentries,
10876                 (header->entries_pageable ? "" : "!"));
10877 }
10878
10879 /*
10880  *      vm_follow_entry:        [ debug ]
10881  */
10882 void
10883 vm_follow_entry(
10884         vm_map_entry_t entry)
10885 {
10886         int shadows;
10887
10888         iprintf("map entry %08X\n", entry);
10889
10890         db_indent += 2;
10891
10892         shadows = vm_follow_object(entry->object.vm_object);
10893         iprintf("Total objects : %d\n",shadows);
10894
10895         db_indent -= 2;
10896 }
10897
10898 /*
10899  *      vm_map_entry_print:     [ debug ]
10900  */
10901 void
10902 vm_map_entry_print(
10903         register vm_map_entry_t entry)
10904 {
10905         static const char *inheritance_name[4] =
10906                 { "share", "copy", "none", "?"};
10907         static const char *behavior_name[4] =
10908                 { "dflt", "rand", "seqtl", "rseqntl" };
10909
10910         iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10911
10912         db_indent += 2;
10913
10914         vm_map_links_print(&entry->links);
10915
10916         iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
10917                 (unsigned long long)entry->vme_start,
10918                 (unsigned long long)entry->vme_end,
10919                 entry->protection,
10920                 entry->max_protection,
10921                 inheritance_name[(entry->inheritance & 0x3)]);
10922
10923         iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10924                 behavior_name[(entry->behavior & 0x3)],
10925                 entry->wired_count,
10926                 entry->user_wired_count);
10927         iprintf("%sin_transition, %sneeds_wakeup\n",
10928                 (entry->in_transition ? "" : "!"),
10929                 (entry->needs_wakeup ? "" : "!"));
10930
10931         if (entry->is_sub_map) {
10932                 iprintf("submap = %08X - offset = %016llX\n",
10933                         entry->object.sub_map,
10934                         (unsigned long long)entry->offset);
10935         } else {
10936                 iprintf("object = %08X  offset = %016llX - ",
10937                         entry->object.vm_object,
10938                         (unsigned long long)entry->offset);
10939                 printf("%sis_shared, %sneeds_copy\n",
10940                        (entry->is_shared ? "" : "!"),
10941                        (entry->needs_copy ? "" : "!"));
10942         }
10943
10944         db_indent -= 2;
10945 }
10946
10947 /*
10948  *      vm_follow_map:  [ debug ]
10949  */
10950 void
10951 vm_follow_map(
10952         vm_map_t map)
10953 {
10954         register vm_map_entry_t entry;
10955
10956         iprintf("task map %08X\n", map);
10957
10958         db_indent += 2;
10959
10960         for (entry = vm_map_first_entry(map);
10961              entry && entry != vm_map_to_entry(map);
10962              entry = entry->vme_next) {
10963                 vm_follow_entry(entry);
10964         }
10965
10966         db_indent -= 2;
10967 }
10968
10969 /*
10970  *      vm_map_print:   [ debug ]
10971  */
10972 void
10973 vm_map_print(
10974         db_addr_t inmap)
10975 {
10976         register vm_map_entry_t entry;
10977         vm_map_t map;
10978 #if TASK_SWAPPER
10979         char *swstate;
10980 #endif /* TASK_SWAPPER */
10981
10982         map = (vm_map_t)(long)
10983                 inmap;  /* Make sure we have the right type */
10984
10985         iprintf("task map %08X\n", map);
10986
10987         db_indent += 2;
10988
10989         vm_map_header_print(&map->hdr);
10990
10991         iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
10992                 map->pmap,
10993                 map->size,
10994                 map->ref_count,
10995                 map->hint,
10996                 map->first_free);
10997
10998         iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10999                 (map->wait_for_space ? "" : "!"),
11000                 (map->wiring_required ? "" : "!"),
11001                 map->timestamp);
11002
11003 #if     TASK_SWAPPER
11004         switch (map->sw_state) {
11005         case MAP_SW_IN:
11006                 swstate = "SW_IN";
11007                 break;
11008         case MAP_SW_OUT:
11009                 swstate = "SW_OUT";
11010                 break;
11011         default:
11012                 swstate = "????";
11013                 break;
11014         }
11015         iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
11016 #endif  /* TASK_SWAPPER */
11017
11018         for (entry = vm_map_first_entry(map);
11019              entry && entry != vm_map_to_entry(map);
11020              entry = entry->vme_next) {
11021                 vm_map_entry_print(entry);
11022         }
11023
11024         db_indent -= 2;
11025 }
11026
11027 /*
11028  *      Routine:        vm_map_copy_print
11029  *      Purpose:
11030  *              Pretty-print a copy object for ddb.
11031  */
11032
11033 void
11034 vm_map_copy_print(
11035         db_addr_t       incopy)
11036 {
11037         vm_map_copy_t copy;
11038         vm_map_entry_t entry;
11039
11040         copy = (vm_map_copy_t)(long)
11041                 incopy; /* Make sure we have the right type */
11042
11043         printf("copy object 0x%x\n", copy);
11044
11045         db_indent += 2;
11046
11047         iprintf("type=%d", copy->type);
11048         switch (copy->type) {
11049         case VM_MAP_COPY_ENTRY_LIST:
11050                 printf("[entry_list]");
11051                 break;
11052
11053         case VM_MAP_COPY_OBJECT:
11054                 printf("[object]");
11055                 break;
11056
11057         case VM_MAP_COPY_KERNEL_BUFFER:
11058                 printf("[kernel_buffer]");
11059                 break;
11060
11061         default:
11062                 printf("[bad type]");
11063                 break;
11064         }
11065         printf(", offset=0x%llx", (unsigned long long)copy->offset);
11066         printf(", size=0x%x\n", copy->size);
11067
11068         switch (copy->type) {
11069         case VM_MAP_COPY_ENTRY_LIST:
11070                 vm_map_header_print(&copy->cpy_hdr);
11071                 for (entry = vm_map_copy_first_entry(copy);
11072                      entry && entry != vm_map_copy_to_entry(copy);
11073                      entry = entry->vme_next) {
11074                         vm_map_entry_print(entry);
11075                 }
11076                 break;
11077
11078         case VM_MAP_COPY_OBJECT:
11079                 iprintf("object=0x%x\n", copy->cpy_object);
11080                 break;
11081
11082         case VM_MAP_COPY_KERNEL_BUFFER:
11083                 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
11084                 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
11085                 break;
11086
11087         }
11088
11089         db_indent -=2;
11090 }
11091
11092 /*
11093  *      db_vm_map_total_size(map)       [ debug ]
11094  *
11095  *      return the total virtual size (in bytes) of the map
11096  */
11097 vm_map_size_t
11098 db_vm_map_total_size(
11099         db_addr_t       inmap)
11100 {
11101         vm_map_entry_t  entry;
11102         vm_map_size_t   total;
11103         vm_map_t map;
11104
11105         map = (vm_map_t)(long)
11106                 inmap;  /* Make sure we have the right type */
11107
11108         total = 0;
11109         for (entry = vm_map_first_entry(map);
11110              entry != vm_map_to_entry(map);
11111              entry = entry->vme_next) {
11112                 total += entry->vme_end - entry->vme_start;
11113         }
11114
11115         return total;
11116 }
11117
11118 #endif  /* MACH_KDB */
11119
11120 /*
11121  *      Routine:        vm_map_entry_insert
11122  *
11123  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
11124  */
11125 vm_map_entry_t
11126 vm_map_entry_insert(
11127         vm_map_t                map,
11128         vm_map_entry_t          insp_entry,
11129         vm_map_offset_t         start,
11130         vm_map_offset_t         end,
11131         vm_object_t             object,
11132         vm_object_offset_t      offset,
11133         boolean_t               needs_copy,
11134         boolean_t               is_shared,
11135         boolean_t               in_transition,
11136         vm_prot_t               cur_protection,
11137         vm_prot_t               max_protection,
11138         vm_behavior_t           behavior,
11139         vm_inherit_t            inheritance,
11140         unsigned                wired_count,
11141         boolean_t               no_cache,
11142         boolean_t               permanent,
11143         unsigned int            superpage_size)
11144 {
11145         vm_map_entry_t  new_entry;
11146
11147         assert(insp_entry != (vm_map_entry_t)0);
11148
11149         new_entry = vm_map_entry_create(map);
11150
11151         new_entry->vme_start = start;
11152         new_entry->vme_end = end;
11153         assert(page_aligned(new_entry->vme_start));
11154         assert(page_aligned(new_entry->vme_end));
11155         assert(new_entry->vme_start < new_entry->vme_end);
11156
11157         new_entry->object.vm_object = object;
11158         new_entry->offset = offset;
11159         new_entry->is_shared = is_shared;
11160         new_entry->is_sub_map = FALSE;
11161         new_entry->needs_copy = needs_copy;
11162         new_entry->in_transition = in_transition;
11163         new_entry->needs_wakeup = FALSE;
11164         new_entry->inheritance = inheritance;
11165         new_entry->protection = cur_protection;
11166         new_entry->max_protection = max_protection;
11167         new_entry->behavior = behavior;
11168         new_entry->wired_count = wired_count;
11169         new_entry->user_wired_count = 0;
11170         new_entry->use_pmap = FALSE;
11171         new_entry->alias = 0;
11172         new_entry->zero_wired_pages = FALSE;
11173         new_entry->no_cache = no_cache;
11174         new_entry->permanent = permanent;
11175         new_entry->superpage_size = superpage_size;
11176         new_entry->used_for_jit = FALSE;
11177
11178         /*
11179          *      Insert the new entry into the list.
11180          */
11181
11182         vm_map_store_entry_link(map, insp_entry, new_entry);
11183         map->size += end - start;
11184
11185         /*
11186          *      Update the free space hint and the lookup hint.
11187          */
11188
11189         SAVE_HINT_MAP_WRITE(map, new_entry);
11190         return new_entry;
11191 }
11192
11193 /*
11194  *      Routine:        vm_map_remap_extract
11195  *
11196  *      Descritpion:    This routine returns a vm_entry list from a map.
11197  */
11198 static kern_return_t
11199 vm_map_remap_extract(
11200         vm_map_t                map,
11201         vm_map_offset_t         addr,
11202         vm_map_size_t           size,
11203         boolean_t               copy,
11204         struct vm_map_header    *map_header,
11205         vm_prot_t               *cur_protection,
11206         vm_prot_t               *max_protection,
11207         /* What, no behavior? */
11208         vm_inherit_t            inheritance,
11209         boolean_t               pageable)
11210 {
11211         kern_return_t           result;
11212         vm_map_size_t           mapped_size;
11213         vm_map_size_t           tmp_size;
11214         vm_map_entry_t          src_entry;     /* result of last map lookup */
11215         vm_map_entry_t          new_entry;
11216         vm_object_offset_t      offset;
11217         vm_map_offset_t         map_address;
11218         vm_map_offset_t         src_start;     /* start of entry to map */
11219         vm_map_offset_t         src_end;       /* end of region to be mapped */
11220         vm_object_t             object;
11221         vm_map_version_t        version;
11222         boolean_t               src_needs_copy;
11223         boolean_t               new_entry_needs_copy;
11224
11225         assert(map != VM_MAP_NULL);
11226         assert(size != 0 && size == vm_map_round_page(size));
11227         assert(inheritance == VM_INHERIT_NONE ||
11228                inheritance == VM_INHERIT_COPY ||
11229                inheritance == VM_INHERIT_SHARE);
11230
11231         /*
11232          *      Compute start and end of region.
11233          */
11234         src_start = vm_map_trunc_page(addr);
11235         src_end = vm_map_round_page(src_start + size);
11236
11237         /*
11238          *      Initialize map_header.
11239          */
11240         map_header->links.next = (struct vm_map_entry *)&map_header->links;
11241         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11242         map_header->nentries = 0;
11243         map_header->entries_pageable = pageable;
11244
11245         vm_map_store_init( map_header );
11246
11247         *cur_protection = VM_PROT_ALL;
11248         *max_protection = VM_PROT_ALL;
11249
11250         map_address = 0;
11251         mapped_size = 0;
11252         result = KERN_SUCCESS;
11253
11254         /*
11255          *      The specified source virtual space might correspond to
11256          *      multiple map entries, need to loop on them.
11257          */
11258         vm_map_lock(map);
11259         while (mapped_size != size) {
11260                 vm_map_size_t   entry_size;
11261
11262                 /*
11263                  *      Find the beginning of the region.
11264                  */
11265                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11266                         result = KERN_INVALID_ADDRESS;
11267                         break;
11268                 }
11269
11270                 if (src_start < src_entry->vme_start ||
11271                     (mapped_size && src_start != src_entry->vme_start)) {
11272                         result = KERN_INVALID_ADDRESS;
11273                         break;
11274                 }
11275
11276                 tmp_size = size - mapped_size;
11277                 if (src_end > src_entry->vme_end)
11278                         tmp_size -= (src_end - src_entry->vme_end);
11279
11280                 entry_size = (vm_map_size_t)(src_entry->vme_end -
11281                                              src_entry->vme_start);
11282
11283                 if(src_entry->is_sub_map) {
11284                         vm_map_reference(src_entry->object.sub_map);
11285                         object = VM_OBJECT_NULL;
11286                 } else {
11287                         object = src_entry->object.vm_object;
11288
11289                         if (object == VM_OBJECT_NULL) {
11290                                 object = vm_object_allocate(entry_size);
11291                                 src_entry->offset = 0;
11292                                 src_entry->object.vm_object = object;
11293                         } else if (object->copy_strategy !=
11294                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11295                                 /*
11296                                  *      We are already using an asymmetric
11297                                  *      copy, and therefore we already have
11298                                  *      the right object.
11299                                  */
11300                                 assert(!src_entry->needs_copy);
11301                         } else if (src_entry->needs_copy || object->shadowed ||
11302                                    (object->internal && !object->true_share &&
11303                                     !src_entry->is_shared &&
11304                                     object->vo_size > entry_size)) {
11305
11306                                 vm_object_shadow(&src_entry->object.vm_object,
11307                                                  &src_entry->offset,
11308                                                  entry_size);
11309
11310                                 if (!src_entry->needs_copy &&
11311                                     (src_entry->protection & VM_PROT_WRITE)) {
11312                                         vm_prot_t prot;
11313
11314                                         prot = src_entry->protection & ~VM_PROT_WRITE;
11315
11316                                         if (override_nx(map, src_entry->alias) && prot)
11317                                                 prot |= VM_PROT_EXECUTE;
11318
11319                                         if(map->mapped) {
11320                                                 vm_object_pmap_protect(
11321                                                         src_entry->object.vm_object,
11322                                                         src_entry->offset,
11323                                                         entry_size,
11324                                                         PMAP_NULL,
11325                                                         src_entry->vme_start,
11326                                                         prot);
11327                                         } else {
11328                                                 pmap_protect(vm_map_pmap(map),
11329                                                              src_entry->vme_start,
11330                                                              src_entry->vme_end,
11331                                                              prot);
11332                                         }
11333                                 }
11334
11335                                 object = src_entry->object.vm_object;
11336                                 src_entry->needs_copy = FALSE;
11337                         }
11338
11339
11340                         vm_object_lock(object);
11341                         vm_object_reference_locked(object); /* object ref. for new entry */
11342                         if (object->copy_strategy ==
11343                             MEMORY_OBJECT_COPY_SYMMETRIC) {
11344                                 object->copy_strategy =
11345                                         MEMORY_OBJECT_COPY_DELAY;
11346                         }
11347                         vm_object_unlock(object);
11348                 }
11349
11350                 offset = src_entry->offset + (src_start - src_entry->vme_start);
11351
11352                 new_entry = _vm_map_entry_create(map_header);
11353                 vm_map_entry_copy(new_entry, src_entry);
11354                 new_entry->use_pmap = FALSE; /* clr address space specifics */
11355
11356                 new_entry->vme_start = map_address;
11357                 new_entry->vme_end = map_address + tmp_size;
11358                 assert(new_entry->vme_start < new_entry->vme_end);
11359                 new_entry->inheritance = inheritance;
11360                 new_entry->offset = offset;
11361
11362                 /*
11363                  * The new region has to be copied now if required.
11364                  */
11365         RestartCopy:
11366                 if (!copy) {
11367                         src_entry->is_shared = TRUE;
11368                         new_entry->is_shared = TRUE;
11369                         if (!(new_entry->is_sub_map))
11370                                 new_entry->needs_copy = FALSE;
11371
11372                 } else if (src_entry->is_sub_map) {
11373                         /* make this a COW sub_map if not already */
11374                         new_entry->needs_copy = TRUE;
11375                         object = VM_OBJECT_NULL;
11376                 } else if (src_entry->wired_count == 0 &&
11377                            vm_object_copy_quickly(&new_entry->object.vm_object,
11378                                                   new_entry->offset,
11379                                                   (new_entry->vme_end -
11380                                                    new_entry->vme_start),
11381                                                   &src_needs_copy,
11382                                                   &new_entry_needs_copy)) {
11383
11384                         new_entry->needs_copy = new_entry_needs_copy;
11385                         new_entry->is_shared = FALSE;
11386
11387                         /*
11388                          * Handle copy_on_write semantics.
11389                          */
11390                         if (src_needs_copy && !src_entry->needs_copy) {
11391                                 vm_prot_t prot;
11392
11393                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11394
11395                                 if (override_nx(map, src_entry->alias) && prot)
11396                                         prot |= VM_PROT_EXECUTE;
11397
11398                                 vm_object_pmap_protect(object,
11399                                                        offset,
11400                                                        entry_size,
11401                                                        ((src_entry->is_shared
11402                                                          || map->mapped) ?
11403                                                         PMAP_NULL : map->pmap),
11404                                                        src_entry->vme_start,
11405                                                        prot);
11406
11407                                 src_entry->needs_copy = TRUE;
11408                         }
11409                         /*
11410                          * Throw away the old object reference of the new entry.
11411                          */
11412                         vm_object_deallocate(object);
11413
11414                 } else {
11415                         new_entry->is_shared = FALSE;
11416
11417                         /*
11418                          * The map can be safely unlocked since we
11419                          * already hold a reference on the object.
11420                          *
11421                          * Record the timestamp of the map for later
11422                          * verification, and unlock the map.
11423                          */
11424                         version.main_timestamp = map->timestamp;
11425                         vm_map_unlock(map);     /* Increments timestamp once! */
11426
11427                         /*
11428                          * Perform the copy.
11429                          */
11430                         if (src_entry->wired_count > 0) {
11431                                 vm_object_lock(object);
11432                                 result = vm_object_copy_slowly(
11433                                         object,
11434                                         offset,
11435                                         entry_size,
11436                                         THREAD_UNINT,
11437                                         &new_entry->object.vm_object);
11438
11439                                 new_entry->offset = 0;
11440                                 new_entry->needs_copy = FALSE;
11441                         } else {
11442                                 result = vm_object_copy_strategically(
11443                                         object,
11444                                         offset,
11445                                         entry_size,
11446                                         &new_entry->object.vm_object,
11447                                         &new_entry->offset,
11448                                         &new_entry_needs_copy);
11449
11450                                 new_entry->needs_copy = new_entry_needs_copy;
11451                         }
11452
11453                         /*
11454                          * Throw away the old object reference of the new entry.
11455                          */
11456                         vm_object_deallocate(object);
11457
11458                         if (result != KERN_SUCCESS &&
11459                             result != KERN_MEMORY_RESTART_COPY) {
11460                                 _vm_map_entry_dispose(map_header, new_entry);
11461                                 break;
11462                         }
11463
11464                         /*
11465                          * Verify that the map has not substantially
11466                          * changed while the copy was being made.
11467                          */
11468
11469                         vm_map_lock(map);
11470                         if (version.main_timestamp + 1 != map->timestamp) {
11471                                 /*
11472                                  * Simple version comparison failed.
11473                                  *
11474                                  * Retry the lookup and verify that the
11475                                  * same object/offset are still present.
11476                                  */
11477                                 vm_object_deallocate(new_entry->
11478                                                      object.vm_object);
11479                                 _vm_map_entry_dispose(map_header, new_entry);
11480                                 if (result == KERN_MEMORY_RESTART_COPY)
11481                                         result = KERN_SUCCESS;
11482                                 continue;
11483                         }
11484
11485                         if (result == KERN_MEMORY_RESTART_COPY) {
11486                                 vm_object_reference(object);
11487                                 goto RestartCopy;
11488                         }
11489                 }
11490
11491                 _vm_map_store_entry_link(map_header,
11492                                    map_header->links.prev, new_entry);
11493
11494                 /*Protections for submap mapping are irrelevant here*/
11495                 if( !src_entry->is_sub_map ) {
11496                         *cur_protection &= src_entry->protection;
11497                         *max_protection &= src_entry->max_protection;
11498                 }
11499                 map_address += tmp_size;
11500                 mapped_size += tmp_size;
11501                 src_start += tmp_size;
11502
11503         } /* end while */
11504
11505         vm_map_unlock(map);
11506         if (result != KERN_SUCCESS) {
11507                 /*
11508                  * Free all allocated elements.
11509                  */
11510                 for (src_entry = map_header->links.next;
11511                      src_entry != (struct vm_map_entry *)&map_header->links;
11512                      src_entry = new_entry) {
11513                         new_entry = src_entry->vme_next;
11514                         _vm_map_store_entry_unlink(map_header, src_entry);
11515                         vm_object_deallocate(src_entry->object.vm_object);
11516                         _vm_map_entry_dispose(map_header, src_entry);
11517                 }
11518         }
11519         return result;
11520 }
11521
11522 /*
11523  *      Routine:        vm_remap
11524  *
11525  *                      Map portion of a task's address space.
11526  *                      Mapped region must not overlap more than
11527  *                      one vm memory object. Protections and
11528  *                      inheritance attributes remain the same
11529  *                      as in the original task and are out parameters.
11530  *                      Source and Target task can be identical
11531  *                      Other attributes are identical as for vm_map()
11532  */
11533 kern_return_t
11534 vm_map_remap(
11535         vm_map_t                target_map,
11536         vm_map_address_t        *address,
11537         vm_map_size_t           size,
11538         vm_map_offset_t         mask,
11539         int                     flags,
11540         vm_map_t                src_map,
11541         vm_map_offset_t         memory_address,
11542         boolean_t               copy,
11543         vm_prot_t               *cur_protection,
11544         vm_prot_t               *max_protection,
11545         vm_inherit_t            inheritance)
11546 {
11547         kern_return_t           result;
11548         vm_map_entry_t          entry;
11549         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11550         vm_map_entry_t          new_entry;
11551         struct vm_map_header    map_header;
11552
11553         if (target_map == VM_MAP_NULL)
11554                 return KERN_INVALID_ARGUMENT;
11555
11556         switch (inheritance) {
11557         case VM_INHERIT_NONE:
11558         case VM_INHERIT_COPY:
11559         case VM_INHERIT_SHARE:
11560                 if (size != 0 && src_map != VM_MAP_NULL)
11561                         break;
11562                 /*FALL THRU*/
11563         default:
11564                 return KERN_INVALID_ARGUMENT;
11565         }
11566
11567         size = vm_map_round_page(size);
11568
11569         result = vm_map_remap_extract(src_map, memory_address,
11570                                       size, copy, &map_header,
11571                                       cur_protection,
11572                                       max_protection,
11573                                       inheritance,
11574                                       target_map->hdr.
11575                                       entries_pageable);
11576
11577         if (result != KERN_SUCCESS) {
11578                 return result;
11579         }
11580
11581         /*
11582          * Allocate/check a range of free virtual address
11583          * space for the target
11584          */
11585         *address = vm_map_trunc_page(*address);
11586         vm_map_lock(target_map);
11587         result = vm_map_remap_range_allocate(target_map, address, size,
11588                                              mask, flags, &insp_entry);
11589
11590         for (entry = map_header.links.next;
11591              entry != (struct vm_map_entry *)&map_header.links;
11592              entry = new_entry) {
11593                 new_entry = entry->vme_next;
11594                 _vm_map_store_entry_unlink(&map_header, entry);
11595                 if (result == KERN_SUCCESS) {
11596                         entry->vme_start += *address;
11597                         entry->vme_end += *address;
11598                         vm_map_store_entry_link(target_map, insp_entry, entry);
11599                         insp_entry = entry;
11600                 } else {
11601                         if (!entry->is_sub_map) {
11602                                 vm_object_deallocate(entry->object.vm_object);
11603                         } else {
11604                                 vm_map_deallocate(entry->object.sub_map);
11605                         }
11606                         _vm_map_entry_dispose(&map_header, entry);
11607                 }
11608         }
11609
11610         if( target_map->disable_vmentry_reuse == TRUE) {
11611                 if( target_map->highest_entry_end < insp_entry->vme_end ){
11612                         target_map->highest_entry_end = insp_entry->vme_end;
11613                 }
11614         }
11615
11616         if (result == KERN_SUCCESS) {
11617                 target_map->size += size;
11618                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11619         }
11620         vm_map_unlock(target_map);
11621
11622         if (result == KERN_SUCCESS && target_map->wiring_required)
11623                 result = vm_map_wire(target_map, *address,
11624                                      *address + size, *cur_protection, TRUE);
11625         return result;
11626 }
11627
11628 /*
11629  *      Routine:        vm_map_remap_range_allocate
11630  *
11631  *      Description:
11632  *              Allocate a range in the specified virtual address map.
11633  *              returns the address and the map entry just before the allocated
11634  *              range
11635  *
11636  *      Map must be locked.
11637  */
11638
11639 static kern_return_t
11640 vm_map_remap_range_allocate(
11641         vm_map_t                map,
11642         vm_map_address_t        *address,       /* IN/OUT */
11643         vm_map_size_t           size,
11644         vm_map_offset_t         mask,
11645         int                     flags,
11646         vm_map_entry_t          *map_entry)     /* OUT */
11647 {
11648         vm_map_entry_t  entry;
11649         vm_map_offset_t start;
11650         vm_map_offset_t end;
11651         kern_return_t   kr;
11652
11653 StartAgain: ;
11654
11655         start = *address;
11656
11657         if (flags & VM_FLAGS_ANYWHERE)
11658         {
11659                 /*
11660                  *      Calculate the first possible address.
11661                  */
11662
11663                 if (start < map->min_offset)
11664                         start = map->min_offset;
11665                 if (start > map->max_offset)
11666                         return(KERN_NO_SPACE);
11667
11668                 /*
11669                  *      Look for the first possible address;
11670                  *      if there's already something at this
11671                  *      address, we have to start after it.
11672                  */
11673
11674                 if( map->disable_vmentry_reuse == TRUE) {
11675                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
11676                 } else {
11677                         assert(first_free_is_valid(map));
11678                         if (start == map->min_offset) {
11679                                 if ((entry = map->first_free) != vm_map_to_entry(map))
11680                                         start = entry->vme_end;
11681                         } else {
11682                                 vm_map_entry_t  tmp_entry;
11683                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
11684                                         start = tmp_entry->vme_end;
11685                                 entry = tmp_entry;
11686                         }
11687                 }
11688
11689                 /*
11690                  *      In any case, the "entry" always precedes
11691                  *      the proposed new region throughout the
11692                  *      loop:
11693                  */
11694
11695                 while (TRUE) {
11696                         register vm_map_entry_t next;
11697
11698                         /*
11699                          *      Find the end of the proposed new region.
11700                          *      Be sure we didn't go beyond the end, or
11701                          *      wrap around the address.
11702                          */
11703
11704                         end = ((start + mask) & ~mask);
11705                         if (end < start)
11706                                 return(KERN_NO_SPACE);
11707                         start = end;
11708                         end += size;
11709
11710                         if ((end > map->max_offset) || (end < start)) {
11711                                 if (map->wait_for_space) {
11712                                         if (size <= (map->max_offset -
11713                                                      map->min_offset)) {
11714                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11715                                                 vm_map_unlock(map);
11716                                                 thread_block(THREAD_CONTINUE_NULL);
11717                                                 vm_map_lock(map);
11718                                                 goto StartAgain;
11719                                         }
11720                                 }
11721
11722                                 return(KERN_NO_SPACE);
11723                         }
11724
11725                         /*
11726                          *      If there are no more entries, we must win.
11727                          */
11728
11729                         next = entry->vme_next;
11730                         if (next == vm_map_to_entry(map))
11731                                 break;
11732
11733                         /*
11734                          *      If there is another entry, it must be
11735                          *      after the end of the potential new region.
11736                          */
11737
11738                         if (next->vme_start >= end)
11739                                 break;
11740
11741                         /*
11742                          *      Didn't fit -- move to the next entry.
11743                          */
11744
11745                         entry = next;
11746                         start = entry->vme_end;
11747                 }
11748                 *address = start;
11749         } else {
11750                 vm_map_entry_t          temp_entry;
11751
11752                 /*
11753                  *      Verify that:
11754                  *              the address doesn't itself violate
11755                  *              the mask requirement.
11756                  */
11757
11758                 if ((start & mask) != 0)
11759                         return(KERN_NO_SPACE);
11760
11761
11762                 /*
11763                  *      ...     the address is within bounds
11764                  */
11765
11766                 end = start + size;
11767
11768                 if ((start < map->min_offset) ||
11769                     (end > map->max_offset) ||
11770                     (start >= end)) {
11771                         return(KERN_INVALID_ADDRESS);
11772                 }
11773
11774                 /*
11775                  * If we're asked to overwrite whatever was mapped in that
11776                  * range, first deallocate that range.
11777                  */
11778                 if (flags & VM_FLAGS_OVERWRITE) {
11779                         vm_map_t zap_map;
11780
11781                         /*
11782                          * We use a "zap_map" to avoid having to unlock
11783                          * the "map" in vm_map_delete(), which would compromise
11784                          * the atomicity of the "deallocate" and then "remap"
11785                          * combination.
11786                          */
11787                         zap_map = vm_map_create(PMAP_NULL,
11788                                                 start,
11789                                                 end - start,
11790                                                 map->hdr.entries_pageable);
11791                         if (zap_map == VM_MAP_NULL) {
11792                                 return KERN_RESOURCE_SHORTAGE;
11793                         }
11794
11795                         kr = vm_map_delete(map, start, end,
11796                                            VM_MAP_REMOVE_SAVE_ENTRIES,
11797                                            zap_map);
11798                         if (kr == KERN_SUCCESS) {
11799                                 vm_map_destroy(zap_map,
11800                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11801                                 zap_map = VM_MAP_NULL;
11802                         }
11803                 }
11804
11805                 /*
11806                  *      ...     the starting address isn't allocated
11807                  */
11808
11809                 if (vm_map_lookup_entry(map, start, &temp_entry))
11810                         return(KERN_NO_SPACE);
11811
11812                 entry = temp_entry;
11813
11814                 /*
11815                  *      ...     the next region doesn't overlap the
11816                  *              end point.
11817                  */
11818
11819                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11820                     (entry->vme_next->vme_start < end))
11821                         return(KERN_NO_SPACE);
11822         }
11823         *map_entry = entry;
11824         return(KERN_SUCCESS);
11825 }
11826
11827 /*
11828  *      vm_map_switch:
11829  *
11830  *      Set the address map for the current thread to the specified map
11831  */
11832
11833 vm_map_t
11834 vm_map_switch(
11835         vm_map_t        map)
11836 {
11837         int             mycpu;
11838         thread_t        thread = current_thread();
11839         vm_map_t        oldmap = thread->map;
11840
11841         mp_disable_preemption();
11842         mycpu = cpu_number();
11843
11844         /*
11845          *      Deactivate the current map and activate the requested map
11846          */
11847         PMAP_SWITCH_USER(thread, map, mycpu);
11848
11849         mp_enable_preemption();
11850         return(oldmap);
11851 }
11852
11853
11854 /*
11855  *      Routine:        vm_map_write_user
11856  *
11857  *      Description:
11858  *              Copy out data from a kernel space into space in the
11859  *              destination map. The space must already exist in the
11860  *              destination map.
11861  *              NOTE:  This routine should only be called by threads
11862  *              which can block on a page fault. i.e. kernel mode user
11863  *              threads.
11864  *
11865  */
11866 kern_return_t
11867 vm_map_write_user(
11868         vm_map_t                map,
11869         void                    *src_p,
11870         vm_map_address_t        dst_addr,
11871         vm_size_t               size)
11872 {
11873         kern_return_t   kr = KERN_SUCCESS;
11874
11875         if(current_map() == map) {
11876                 if (copyout(src_p, dst_addr, size)) {
11877                         kr = KERN_INVALID_ADDRESS;
11878                 }
11879         } else {
11880                 vm_map_t        oldmap;
11881
11882                 /* take on the identity of the target map while doing */
11883                 /* the transfer */
11884
11885                 vm_map_reference(map);
11886                 oldmap = vm_map_switch(map);
11887                 if (copyout(src_p, dst_addr, size)) {
11888                         kr = KERN_INVALID_ADDRESS;
11889                 }
11890                 vm_map_switch(oldmap);
11891                 vm_map_deallocate(map);
11892         }
11893         return kr;
11894 }
11895
11896 /*
11897  *      Routine:        vm_map_read_user
11898  *
11899  *      Description:
11900  *              Copy in data from a user space source map into the
11901  *              kernel map. The space must already exist in the
11902  *              kernel map.
11903  *              NOTE:  This routine should only be called by threads
11904  *              which can block on a page fault. i.e. kernel mode user
11905  *              threads.
11906  *
11907  */
11908 kern_return_t
11909 vm_map_read_user(
11910         vm_map_t                map,
11911         vm_map_address_t        src_addr,
11912         void                    *dst_p,
11913         vm_size_t               size)
11914 {
11915         kern_return_t   kr = KERN_SUCCESS;
11916
11917         if(current_map() == map) {
11918                 if (copyin(src_addr, dst_p, size)) {
11919                         kr = KERN_INVALID_ADDRESS;
11920                 }
11921         } else {
11922                 vm_map_t        oldmap;
11923
11924                 /* take on the identity of the target map while doing */
11925                 /* the transfer */
11926
11927                 vm_map_reference(map);
11928                 oldmap = vm_map_switch(map);
11929                 if (copyin(src_addr, dst_p, size)) {
11930                         kr = KERN_INVALID_ADDRESS;
11931                 }
11932                 vm_map_switch(oldmap);
11933                 vm_map_deallocate(map);
11934         }
11935         return kr;
11936 }
11937
11938
11939 /*
11940  *      vm_map_check_protection:
11941  *
11942  *      Assert that the target map allows the specified
11943  *      privilege on the entire address region given.
11944  *      The entire region must be allocated.
11945  */
11946 boolean_t
11947 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11948                         vm_map_offset_t end, vm_prot_t protection)
11949 {
11950         vm_map_entry_t entry;
11951         vm_map_entry_t tmp_entry;
11952
11953         vm_map_lock(map);
11954
11955         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11956         {
11957                 vm_map_unlock(map);
11958                 return (FALSE);
11959         }
11960
11961         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11962                 vm_map_unlock(map);
11963                 return(FALSE);
11964         }
11965
11966         entry = tmp_entry;
11967
11968         while (start < end) {
11969                 if (entry == vm_map_to_entry(map)) {
11970                         vm_map_unlock(map);
11971                         return(FALSE);
11972                 }
11973
11974                 /*
11975                  *      No holes allowed!
11976                  */
11977
11978                 if (start < entry->vme_start) {
11979                         vm_map_unlock(map);
11980                         return(FALSE);
11981                 }
11982
11983                 /*
11984                  * Check protection associated with entry.
11985                  */
11986
11987                 if ((entry->protection & protection) != protection) {
11988                         vm_map_unlock(map);
11989                         return(FALSE);
11990                 }
11991
11992                 /* go to next entry */
11993
11994                 start = entry->vme_end;
11995                 entry = entry->vme_next;
11996         }
11997         vm_map_unlock(map);
11998         return(TRUE);
11999 }
12000
12001 kern_return_t
12002 vm_map_purgable_control(
12003         vm_map_t                map,
12004         vm_map_offset_t         address,
12005         vm_purgable_t           control,
12006         int                     *state)
12007 {
12008         vm_map_entry_t          entry;
12009         vm_object_t             object;
12010         kern_return_t           kr;
12011
12012         /*
12013          * Vet all the input parameters and current type and state of the
12014          * underlaying object.  Return with an error if anything is amiss.
12015          */
12016         if (map == VM_MAP_NULL)
12017                 return(KERN_INVALID_ARGUMENT);
12018
12019         if (control != VM_PURGABLE_SET_STATE &&
12020             control != VM_PURGABLE_GET_STATE &&
12021             control != VM_PURGABLE_PURGE_ALL)
12022                 return(KERN_INVALID_ARGUMENT);
12023
12024         if (control == VM_PURGABLE_PURGE_ALL) {
12025                 vm_purgeable_object_purge_all();
12026                 return KERN_SUCCESS;
12027         }
12028
12029         if (control == VM_PURGABLE_SET_STATE &&
12030             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
12031              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
12032                 return(KERN_INVALID_ARGUMENT);
12033
12034         vm_map_lock_read(map);
12035
12036         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
12037
12038                 /*
12039                  * Must pass a valid non-submap address.
12040                  */
12041                 vm_map_unlock_read(map);
12042                 return(KERN_INVALID_ADDRESS);
12043         }
12044
12045         if ((entry->protection & VM_PROT_WRITE) == 0) {
12046                 /*
12047                  * Can't apply purgable controls to something you can't write.
12048                  */
12049                 vm_map_unlock_read(map);
12050                 return(KERN_PROTECTION_FAILURE);
12051         }
12052
12053         object = entry->object.vm_object;
12054         if (object == VM_OBJECT_NULL) {
12055                 /*
12056                  * Object must already be present or it can't be purgable.
12057                  */
12058                 vm_map_unlock_read(map);
12059                 return KERN_INVALID_ARGUMENT;
12060         }
12061
12062         vm_object_lock(object);
12063
12064         if (entry->offset != 0 ||
12065             entry->vme_end - entry->vme_start != object->vo_size) {
12066                 /*
12067                  * Can only apply purgable controls to the whole (existing)
12068                  * object at once.
12069                  */
12070                 vm_map_unlock_read(map);
12071                 vm_object_unlock(object);
12072                 return KERN_INVALID_ARGUMENT;
12073         }
12074
12075         vm_map_unlock_read(map);
12076
12077         kr = vm_object_purgable_control(object, control, state);
12078
12079         vm_object_unlock(object);
12080
12081         return kr;
12082 }
12083
12084 kern_return_t
12085 vm_map_page_query_internal(
12086         vm_map_t        target_map,
12087         vm_map_offset_t offset,
12088         int             *disposition,
12089         int             *ref_count)
12090 {
12091         kern_return_t                   kr;
12092         vm_page_info_basic_data_t       info;
12093         mach_msg_type_number_t          count;
12094
12095         count = VM_PAGE_INFO_BASIC_COUNT;
12096         kr = vm_map_page_info(target_map,
12097                               offset,
12098                               VM_PAGE_INFO_BASIC,
12099                               (vm_page_info_t) &info,
12100                               &count);
12101         if (kr == KERN_SUCCESS) {
12102                 *disposition = info.disposition;
12103                 *ref_count = info.ref_count;
12104         } else {
12105                 *disposition = 0;
12106                 *ref_count = 0;
12107         }
12108
12109         return kr;
12110 }
12111
12112 kern_return_t
12113 vm_map_page_info(
12114         vm_map_t                map,
12115         vm_map_offset_t         offset,
12116         vm_page_info_flavor_t   flavor,
12117         vm_page_info_t          info,
12118         mach_msg_type_number_t  *count)
12119 {
12120         vm_map_entry_t          map_entry;
12121         vm_object_t             object;
12122         vm_page_t               m;
12123         kern_return_t           kr;
12124         kern_return_t           retval = KERN_SUCCESS;
12125         boolean_t               top_object;
12126         int                     disposition;
12127         int                     ref_count;
12128         vm_object_id_t          object_id;
12129         vm_page_info_basic_t    basic_info;
12130         int                     depth;
12131         vm_map_offset_t         offset_in_page;
12132
12133         switch (flavor) {
12134         case VM_PAGE_INFO_BASIC:
12135                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12136                         /*
12137                          * The "vm_page_info_basic_data" structure was not
12138                          * properly padded, so allow the size to be off by
12139                          * one to maintain backwards binary compatibility...
12140                          */
12141                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12142                                 return KERN_INVALID_ARGUMENT;
12143                 }
12144                 break;
12145         default:
12146                 return KERN_INVALID_ARGUMENT;
12147         }
12148
12149         disposition = 0;
12150         ref_count = 0;
12151         object_id = 0;
12152         top_object = TRUE;
12153         depth = 0;
12154
12155         retval = KERN_SUCCESS;
12156         offset_in_page = offset & PAGE_MASK;
12157         offset = vm_map_trunc_page(offset);
12158
12159         vm_map_lock_read(map);
12160
12161         /*
12162          * First, find the map entry covering "offset", going down
12163          * submaps if necessary.
12164          */
12165         for (;;) {
12166                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12167                         vm_map_unlock_read(map);
12168                         return KERN_INVALID_ADDRESS;
12169                 }
12170                 /* compute offset from this map entry's start */
12171                 offset -= map_entry->vme_start;
12172                 /* compute offset into this map entry's object (or submap) */
12173                 offset += map_entry->offset;
12174
12175                 if (map_entry->is_sub_map) {
12176                         vm_map_t sub_map;
12177
12178                         sub_map = map_entry->object.sub_map;
12179                         vm_map_lock_read(sub_map);
12180                         vm_map_unlock_read(map);
12181
12182                         map = sub_map;
12183
12184                         ref_count = MAX(ref_count, map->ref_count);
12185                         continue;
12186                 }
12187                 break;
12188         }
12189
12190         object = map_entry->object.vm_object;
12191         if (object == VM_OBJECT_NULL) {
12192                 /* no object -> no page */
12193                 vm_map_unlock_read(map);
12194                 goto done;
12195         }
12196
12197         vm_object_lock(object);
12198         vm_map_unlock_read(map);
12199
12200         /*
12201          * Go down the VM object shadow chain until we find the page
12202          * we're looking for.
12203          */
12204         for (;;) {
12205                 ref_count = MAX(ref_count, object->ref_count);
12206
12207                 m = vm_page_lookup(object, offset);
12208
12209                 if (m != VM_PAGE_NULL) {
12210                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12211                         break;
12212                 } else {
12213 #if MACH_PAGEMAP
12214                         if (object->existence_map) {
12215                                 if (vm_external_state_get(object->existence_map,
12216                                                           offset) ==
12217                                     VM_EXTERNAL_STATE_EXISTS) {
12218                                         /*
12219                                          * this page has been paged out
12220                                          */
12221                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12222                                         break;
12223                                 }
12224                         } else
12225 #endif
12226                         {
12227                                 if (object->internal &&
12228                                     object->alive &&
12229                                     !object->terminating &&
12230                                     object->pager_ready) {
12231
12232                                         memory_object_t pager;
12233
12234                                         vm_object_paging_begin(object);
12235                                         pager = object->pager;
12236                                         vm_object_unlock(object);
12237
12238                                         /*
12239                                          * Ask the default pager if
12240                                          * it has this page.
12241                                          */
12242                                         kr = memory_object_data_request(
12243                                                 pager,
12244                                                 offset + object->paging_offset,
12245                                                 0, /* just poke the pager */
12246                                                 VM_PROT_READ,
12247                                                 NULL);
12248
12249                                         vm_object_lock(object);
12250                                         vm_object_paging_end(object);
12251
12252                                         if (kr == KERN_SUCCESS) {
12253                                                 /* the default pager has it */
12254                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12255                                                 break;
12256                                         }
12257                                 }
12258                         }
12259
12260                         if (object->shadow != VM_OBJECT_NULL) {
12261                                 vm_object_t shadow;
12262
12263                                 offset += object->vo_shadow_offset;
12264                                 shadow = object->shadow;
12265
12266                                 vm_object_lock(shadow);
12267                                 vm_object_unlock(object);
12268
12269                                 object = shadow;
12270                                 top_object = FALSE;
12271                                 depth++;
12272                         } else {
12273 //                              if (!object->internal)
12274 //                                      break;
12275 //                              retval = KERN_FAILURE;
12276 //                              goto done_with_object;
12277                                 break;
12278                         }
12279                 }
12280         }
12281         /* The ref_count is not strictly accurate, it measures the number   */
12282         /* of entities holding a ref on the object, they may not be mapping */
12283         /* the object or may not be mapping the section holding the         */
12284         /* target page but its still a ball park number and though an over- */
12285         /* count, it picks up the copy-on-write cases                       */
12286
12287         /* We could also get a picture of page sharing from pmap_attributes */
12288         /* but this would under count as only faulted-in mappings would     */
12289         /* show up.                                                         */
12290
12291         if (top_object == TRUE && object->shadow)
12292                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12293
12294         if (! object->internal)
12295                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12296
12297         if (m == VM_PAGE_NULL)
12298                 goto done_with_object;
12299
12300         if (m->fictitious) {
12301                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12302                 goto done_with_object;
12303         }
12304         if (m->dirty || pmap_is_modified(m->phys_page))
12305                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12306
12307         if (m->reference || pmap_is_referenced(m->phys_page))
12308                 disposition |= VM_PAGE_QUERY_PAGE_REF;
12309
12310         if (m->speculative)
12311                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12312
12313         if (m->cs_validated)
12314                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12315         if (m->cs_tainted)
12316                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12317
12318 done_with_object:
12319         vm_object_unlock(object);
12320 done:
12321
12322         switch (flavor) {
12323         case VM_PAGE_INFO_BASIC:
12324                 basic_info = (vm_page_info_basic_t) info;
12325                 basic_info->disposition = disposition;
12326                 basic_info->ref_count = ref_count;
12327                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12328                 basic_info->offset =
12329                         (memory_object_offset_t) offset + offset_in_page;
12330                 basic_info->depth = depth;
12331                 break;
12332         }
12333
12334         return retval;
12335 }
12336
12337 /*
12338  *      vm_map_msync
12339  *
12340  *      Synchronises the memory range specified with its backing store
12341  *      image by either flushing or cleaning the contents to the appropriate
12342  *      memory manager engaging in a memory object synchronize dialog with
12343  *      the manager.  The client doesn't return until the manager issues
12344  *      m_o_s_completed message.  MIG Magically converts user task parameter
12345  *      to the task's address map.
12346  *
12347  *      interpretation of sync_flags
12348  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
12349  *                                pages to manager.
12350  *
12351  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12352  *                              - discard pages, write dirty or precious
12353  *                                pages back to memory manager.
12354  *
12355  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12356  *                              - write dirty or precious pages back to
12357  *                                the memory manager.
12358  *
12359  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
12360  *                                is a hole in the region, and we would
12361  *                                have returned KERN_SUCCESS, return
12362  *                                KERN_INVALID_ADDRESS instead.
12363  *
12364  *      NOTE
12365  *      The memory object attributes have not yet been implemented, this
12366  *      function will have to deal with the invalidate attribute
12367  *
12368  *      RETURNS
12369  *      KERN_INVALID_TASK               Bad task parameter
12370  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
12371  *      KERN_SUCCESS                    The usual.
12372  *      KERN_INVALID_ADDRESS            There was a hole in the region.
12373  */
12374
12375 kern_return_t
12376 vm_map_msync(
12377         vm_map_t                map,
12378         vm_map_address_t        address,
12379         vm_map_size_t           size,
12380         vm_sync_t               sync_flags)
12381 {
12382         msync_req_t             msr;
12383         msync_req_t             new_msr;
12384         queue_chain_t           req_q;  /* queue of requests for this msync */
12385         vm_map_entry_t          entry;
12386         vm_map_size_t           amount_left;
12387         vm_object_offset_t      offset;
12388         boolean_t               do_sync_req;
12389         boolean_t               had_hole = FALSE;
12390         memory_object_t         pager;
12391
12392         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12393             (sync_flags & VM_SYNC_SYNCHRONOUS))
12394                 return(KERN_INVALID_ARGUMENT);
12395
12396         /*
12397          * align address and size on page boundaries
12398          */
12399         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12400         address = vm_map_trunc_page(address);
12401
12402         if (map == VM_MAP_NULL)
12403                 return(KERN_INVALID_TASK);
12404
12405         if (size == 0)
12406                 return(KERN_SUCCESS);
12407
12408         queue_init(&req_q);
12409         amount_left = size;
12410
12411         while (amount_left > 0) {
12412                 vm_object_size_t        flush_size;
12413                 vm_object_t             object;
12414
12415                 vm_map_lock(map);
12416                 if (!vm_map_lookup_entry(map,
12417                                          vm_map_trunc_page(address), &entry)) {
12418
12419                         vm_map_size_t   skip;
12420
12421                         /*
12422                          * hole in the address map.
12423                          */
12424                         had_hole = TRUE;
12425
12426                         /*
12427                          * Check for empty map.
12428                          */
12429                         if (entry == vm_map_to_entry(map) &&
12430                             entry->vme_next == entry) {
12431                                 vm_map_unlock(map);
12432                                 break;
12433                         }
12434                         /*
12435                          * Check that we don't wrap and that
12436                          * we have at least one real map entry.
12437                          */
12438                         if ((map->hdr.nentries == 0) ||
12439                             (entry->vme_next->vme_start < address)) {
12440                                 vm_map_unlock(map);
12441                                 break;
12442                         }
12443                         /*
12444                          * Move up to the next entry if needed
12445                          */
12446                         skip = (entry->vme_next->vme_start - address);
12447                         if (skip >= amount_left)
12448                                 amount_left = 0;
12449                         else
12450                                 amount_left -= skip;
12451                         address = entry->vme_next->vme_start;
12452                         vm_map_unlock(map);
12453                         continue;
12454                 }
12455
12456                 offset = address - entry->vme_start;
12457
12458                 /*
12459                  * do we have more to flush than is contained in this
12460                  * entry ?
12461                  */
12462                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12463                         flush_size = entry->vme_end -
12464                                 (entry->vme_start + offset);
12465                 } else {
12466                         flush_size = amount_left;
12467                 }
12468                 amount_left -= flush_size;
12469                 address += flush_size;
12470
12471                 if (entry->is_sub_map == TRUE) {
12472                         vm_map_t        local_map;
12473                         vm_map_offset_t local_offset;
12474
12475                         local_map = entry->object.sub_map;
12476                         local_offset = entry->offset;
12477                         vm_map_unlock(map);
12478                         if (vm_map_msync(
12479                                     local_map,
12480                                     local_offset,
12481                                     flush_size,
12482                                     sync_flags) == KERN_INVALID_ADDRESS) {
12483                                 had_hole = TRUE;
12484                         }
12485                         continue;
12486                 }
12487                 object = entry->object.vm_object;
12488
12489                 /*
12490                  * We can't sync this object if the object has not been
12491                  * created yet
12492                  */
12493                 if (object == VM_OBJECT_NULL) {
12494                         vm_map_unlock(map);
12495                         continue;
12496                 }
12497                 offset += entry->offset;
12498
12499                 vm_object_lock(object);
12500
12501                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12502                         int kill_pages = 0;
12503                         boolean_t reusable_pages = FALSE;
12504
12505                         if (sync_flags & VM_SYNC_KILLPAGES) {
12506                                 if (object->ref_count == 1 && !object->shadow)
12507                                         kill_pages = 1;
12508                                 else
12509                                         kill_pages = -1;
12510                         }
12511                         if (kill_pages != -1)
12512                                 vm_object_deactivate_pages(object, offset,
12513                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12514                         vm_object_unlock(object);
12515                         vm_map_unlock(map);
12516                         continue;
12517                 }
12518                 /*
12519                  * We can't sync this object if there isn't a pager.
12520                  * Don't bother to sync internal objects, since there can't
12521                  * be any "permanent" storage for these objects anyway.
12522                  */
12523                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12524                     (object->internal) || (object->private)) {
12525                         vm_object_unlock(object);
12526                         vm_map_unlock(map);
12527                         continue;
12528                 }
12529                 /*
12530                  * keep reference on the object until syncing is done
12531                  */
12532                 vm_object_reference_locked(object);
12533                 vm_object_unlock(object);
12534
12535                 vm_map_unlock(map);
12536
12537                 do_sync_req = vm_object_sync(object,
12538                                              offset,
12539                                              flush_size,
12540                                              sync_flags & VM_SYNC_INVALIDATE,
12541                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12542                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12543                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12544                 /*
12545                  * only send a m_o_s if we returned pages or if the entry
12546                  * is writable (ie dirty pages may have already been sent back)
12547                  */
12548                 if (!do_sync_req) {
12549                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12550                                 /*
12551                                  * clear out the clustering and read-ahead hints
12552                                  */
12553                                 vm_object_lock(object);
12554
12555                                 object->pages_created = 0;
12556                                 object->pages_used = 0;
12557                                 object->sequential = 0;
12558                                 object->last_alloc = 0;
12559
12560                                 vm_object_unlock(object);
12561                         }
12562                         vm_object_deallocate(object);
12563                         continue;
12564                 }
12565                 msync_req_alloc(new_msr);
12566
12567                 vm_object_lock(object);
12568                 offset += object->paging_offset;
12569
12570                 new_msr->offset = offset;
12571                 new_msr->length = flush_size;
12572                 new_msr->object = object;
12573                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12574         re_iterate:
12575
12576                 /*
12577                  * We can't sync this object if there isn't a pager.  The
12578                  * pager can disappear anytime we're not holding the object
12579                  * lock.  So this has to be checked anytime we goto re_iterate.
12580                  */
12581
12582                 pager = object->pager;
12583
12584                 if (pager == MEMORY_OBJECT_NULL) {
12585                         vm_object_unlock(object);
12586                         vm_object_deallocate(object);
12587                         continue;
12588                 }
12589
12590                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12591                         /*
12592                          * need to check for overlapping entry, if found, wait
12593                          * on overlapping msr to be done, then reiterate
12594                          */
12595                         msr_lock(msr);
12596                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12597                             ((offset >= msr->offset &&
12598                               offset < (msr->offset + msr->length)) ||
12599                              (msr->offset >= offset &&
12600                               msr->offset < (offset + flush_size))))
12601                         {
12602                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12603                                 msr_unlock(msr);
12604                                 vm_object_unlock(object);
12605                                 thread_block(THREAD_CONTINUE_NULL);
12606                                 vm_object_lock(object);
12607                                 goto re_iterate;
12608                         }
12609                         msr_unlock(msr);
12610                 }/* queue_iterate */
12611
12612                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12613
12614                 vm_object_paging_begin(object);
12615                 vm_object_unlock(object);
12616
12617                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12618
12619                 (void) memory_object_synchronize(
12620                         pager,
12621                         offset,
12622                         flush_size,
12623                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12624
12625                 vm_object_lock(object);
12626                 vm_object_paging_end(object);
12627                 vm_object_unlock(object);
12628         }/* while */
12629
12630         /*
12631          * wait for memory_object_sychronize_completed messages from pager(s)
12632          */
12633
12634         while (!queue_empty(&req_q)) {
12635                 msr = (msync_req_t)queue_first(&req_q);
12636                 msr_lock(msr);
12637                 while(msr->flag != VM_MSYNC_DONE) {
12638                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12639                         msr_unlock(msr);
12640                         thread_block(THREAD_CONTINUE_NULL);
12641                         msr_lock(msr);
12642                 }/* while */
12643                 queue_remove(&req_q, msr, msync_req_t, req_q);
12644                 msr_unlock(msr);
12645                 vm_object_deallocate(msr->object);
12646                 msync_req_free(msr);
12647         }/* queue_iterate */
12648
12649         /* for proper msync() behaviour */
12650         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12651                 return(KERN_INVALID_ADDRESS);
12652
12653         return(KERN_SUCCESS);
12654 }/* vm_msync */
12655
12656 /*
12657  *      Routine:        convert_port_entry_to_map
12658  *      Purpose:
12659  *              Convert from a port specifying an entry or a task
12660  *              to a map. Doesn't consume the port ref; produces a map ref,
12661  *              which may be null.  Unlike convert_port_to_map, the
12662  *              port may be task or a named entry backed.
12663  *      Conditions:
12664  *              Nothing locked.
12665  */
12666
12667
12668 vm_map_t
12669 convert_port_entry_to_map(
12670         ipc_port_t      port)
12671 {
12672         vm_map_t map;
12673         vm_named_entry_t        named_entry;
12674         uint32_t        try_failed_count = 0;
12675
12676         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12677                 while(TRUE) {
12678                         ip_lock(port);
12679                         if(ip_active(port) && (ip_kotype(port)
12680                                                == IKOT_NAMED_ENTRY)) {
12681                                 named_entry =
12682                                         (vm_named_entry_t)port->ip_kobject;
12683                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12684                                         ip_unlock(port);
12685
12686                                         try_failed_count++;
12687                                         mutex_pause(try_failed_count);
12688                                         continue;
12689                                 }
12690                                 named_entry->ref_count++;
12691                                 lck_mtx_unlock(&(named_entry)->Lock);
12692                                 ip_unlock(port);
12693                                 if ((named_entry->is_sub_map) &&
12694                                     (named_entry->protection
12695                                      & VM_PROT_WRITE)) {
12696                                         map = named_entry->backing.map;
12697                                 } else {
12698                                         mach_destroy_memory_entry(port);
12699                                         return VM_MAP_NULL;
12700                                 }
12701                                 vm_map_reference_swap(map);
12702                                 mach_destroy_memory_entry(port);
12703                                 break;
12704                         }
12705                         else
12706                                 return VM_MAP_NULL;
12707                 }
12708         }
12709         else
12710                 map = convert_port_to_map(port);
12711
12712         return map;
12713 }
12714
12715 /*
12716  *      Routine:        convert_port_entry_to_object
12717  *      Purpose:
12718  *              Convert from a port specifying a named entry to an
12719  *              object. Doesn't consume the port ref; produces a map ref,
12720  *              which may be null.
12721  *      Conditions:
12722  *              Nothing locked.
12723  */
12724
12725
12726 vm_object_t
12727 convert_port_entry_to_object(
12728         ipc_port_t      port)
12729 {
12730         vm_object_t object;
12731         vm_named_entry_t        named_entry;
12732         uint32_t        try_failed_count = 0;
12733
12734         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12735                 while(TRUE) {
12736                         ip_lock(port);
12737                         if(ip_active(port) && (ip_kotype(port)
12738                                                == IKOT_NAMED_ENTRY)) {
12739                                 named_entry =
12740                                         (vm_named_entry_t)port->ip_kobject;
12741                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12742                                         ip_unlock(port);
12743
12744                                         try_failed_count++;
12745                                         mutex_pause(try_failed_count);
12746                                         continue;
12747                                 }
12748                                 named_entry->ref_count++;
12749                                 lck_mtx_unlock(&(named_entry)->Lock);
12750                                 ip_unlock(port);
12751                                 if ((!named_entry->is_sub_map) &&
12752                                     (!named_entry->is_pager) &&
12753                                     (named_entry->protection
12754                                      & VM_PROT_WRITE)) {
12755                                         object = named_entry->backing.object;
12756                                 } else {
12757                                         mach_destroy_memory_entry(port);
12758                                         return (vm_object_t)NULL;
12759                                 }
12760                                 vm_object_reference(named_entry->backing.object);
12761                                 mach_destroy_memory_entry(port);
12762                                 break;
12763                         }
12764                         else
12765                                 return (vm_object_t)NULL;
12766                 }
12767         } else {
12768                 return (vm_object_t)NULL;
12769         }
12770
12771         return object;
12772 }
12773
12774 /*
12775  * Export routines to other components for the things we access locally through
12776  * macros.
12777  */
12778 #undef current_map
12779 vm_map_t
12780 current_map(void)
12781 {
12782         return (current_map_fast());
12783 }
12784
12785 /*
12786  *      vm_map_reference:
12787  *
12788  *      Most code internal to the osfmk will go through a
12789  *      macro defining this.  This is always here for the
12790  *      use of other kernel components.
12791  */
12792 #undef vm_map_reference
12793 void
12794 vm_map_reference(
12795         register vm_map_t       map)
12796 {
12797         if (map == VM_MAP_NULL)
12798                 return;
12799
12800         lck_mtx_lock(&map->s_lock);
12801 #if     TASK_SWAPPER
12802         assert(map->res_count > 0);
12803         assert(map->ref_count >= map->res_count);
12804         map->res_count++;
12805 #endif
12806         map->ref_count++;
12807         lck_mtx_unlock(&map->s_lock);
12808 }
12809
12810 /*
12811  *      vm_map_deallocate:
12812  *
12813  *      Removes a reference from the specified map,
12814  *      destroying it if no references remain.
12815  *      The map should not be locked.
12816  */
12817 void
12818 vm_map_deallocate(
12819         register vm_map_t       map)
12820 {
12821         unsigned int            ref;
12822
12823         if (map == VM_MAP_NULL)
12824                 return;
12825
12826         lck_mtx_lock(&map->s_lock);
12827         ref = --map->ref_count;
12828         if (ref > 0) {
12829                 vm_map_res_deallocate(map);
12830                 lck_mtx_unlock(&map->s_lock);
12831                 return;
12832         }
12833         assert(map->ref_count == 0);
12834         lck_mtx_unlock(&map->s_lock);
12835
12836 #if     TASK_SWAPPER
12837         /*
12838          * The map residence count isn't decremented here because
12839          * the vm_map_delete below will traverse the entire map,
12840          * deleting entries, and the residence counts on objects
12841          * and sharing maps will go away then.
12842          */
12843 #endif
12844
12845         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12846 }
12847
12848
12849 void
12850 vm_map_disable_NX(vm_map_t map)
12851 {
12852         if (map == NULL)
12853                 return;
12854         if (map->pmap == NULL)
12855                 return;
12856
12857         pmap_disable_NX(map->pmap);
12858 }
12859
12860 void
12861 vm_map_disallow_data_exec(vm_map_t map)
12862 {
12863     if (map == NULL)
12864         return;
12865
12866     map->map_disallow_data_exec = TRUE;
12867 }
12868
12869 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12870  * more descriptive.
12871  */
12872 void
12873 vm_map_set_32bit(vm_map_t map)
12874 {
12875         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12876 }
12877
12878
12879 void
12880 vm_map_set_64bit(vm_map_t map)
12881 {
12882         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12883 }
12884
12885 vm_map_offset_t
12886 vm_compute_max_offset(unsigned is64)
12887 {
12888         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12889 }
12890
12891 boolean_t
12892 vm_map_is_64bit(
12893                 vm_map_t map)
12894 {
12895         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12896 }
12897
12898 boolean_t
12899 vm_map_has_4GB_pagezero(
12900                 vm_map_t map)
12901 {
12902         /*
12903          * XXX FBDP
12904          * We should lock the VM map (for read) here but we can get away
12905          * with it for now because there can't really be any race condition:
12906          * the VM map's min_offset is changed only when the VM map is created
12907          * and when the zero page is established (when the binary gets loaded),
12908          * and this routine gets called only when the task terminates and the
12909          * VM map is being torn down, and when a new map is created via
12910          * load_machfile()/execve().
12911          */
12912         return (map->min_offset >= 0x100000000ULL);
12913 }
12914
12915 void
12916 vm_map_set_4GB_pagezero(vm_map_t map)
12917 {
12918 #if defined(__i386__)
12919         pmap_set_4GB_pagezero(map->pmap);
12920 #else
12921 #pragma unused(map)
12922 #endif
12923
12924 }
12925
12926 void
12927 vm_map_clear_4GB_pagezero(vm_map_t map)
12928 {
12929 #if defined(__i386__)
12930         pmap_clear_4GB_pagezero(map->pmap);
12931 #else
12932 #pragma unused(map)
12933 #endif
12934 }
12935
12936 /*
12937  * Raise a VM map's minimum offset.
12938  * To strictly enforce "page zero" reservation.
12939  */
12940 kern_return_t
12941 vm_map_raise_min_offset(
12942         vm_map_t        map,
12943         vm_map_offset_t new_min_offset)
12944 {
12945         vm_map_entry_t  first_entry;
12946
12947         new_min_offset = vm_map_round_page(new_min_offset);
12948
12949         vm_map_lock(map);
12950
12951         if (new_min_offset < map->min_offset) {
12952                 /*
12953                  * Can't move min_offset backwards, as that would expose
12954                  * a part of the address space that was previously, and for
12955                  * possibly good reasons, inaccessible.
12956                  */
12957                 vm_map_unlock(map);
12958                 return KERN_INVALID_ADDRESS;
12959         }
12960
12961         first_entry = vm_map_first_entry(map);
12962         if (first_entry != vm_map_to_entry(map) &&
12963             first_entry->vme_start < new_min_offset) {
12964                 /*
12965                  * Some memory was already allocated below the new
12966                  * minimun offset.  It's too late to change it now...
12967                  */
12968                 vm_map_unlock(map);
12969                 return KERN_NO_SPACE;
12970         }
12971
12972         map->min_offset = new_min_offset;
12973
12974         vm_map_unlock(map);
12975
12976         return KERN_SUCCESS;
12977 }
12978
12979 /*
12980  * Set the limit on the maximum amount of user wired memory allowed for this map.
12981  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12982  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12983  * don't have to reach over to the BSD data structures.
12984  */
12985
12986 void
12987 vm_map_set_user_wire_limit(vm_map_t     map,
12988                            vm_size_t    limit)
12989 {
12990         map->user_wire_limit = limit;
12991 }
12992
12993
12994 void vm_map_switch_protect(vm_map_t     map,
12995                            boolean_t    val)
12996 {
12997         vm_map_lock(map);
12998         map->switch_protect=val;
12999         vm_map_unlock(map);
13000 }
13001
13002 /* Add (generate) code signature for memory range */
13003 #if CONFIG_DYNAMIC_CODE_SIGNING
13004 kern_return_t vm_map_sign(vm_map_t map,
13005                  vm_map_offset_t start,
13006                  vm_map_offset_t end)
13007 {
13008         vm_map_entry_t entry;
13009         vm_page_t m;
13010         vm_object_t object;
13011
13012         /*
13013          * Vet all the input parameters and current type and state of the
13014          * underlaying object.  Return with an error if anything is amiss.
13015          */
13016         if (map == VM_MAP_NULL)
13017                 return(KERN_INVALID_ARGUMENT);
13018
13019         vm_map_lock_read(map);
13020
13021         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
13022                 /*
13023                  * Must pass a valid non-submap address.
13024                  */
13025                 vm_map_unlock_read(map);
13026                 return(KERN_INVALID_ADDRESS);
13027         }
13028
13029         if((entry->vme_start > start) || (entry->vme_end < end)) {
13030                 /*
13031                  * Map entry doesn't cover the requested range. Not handling
13032                  * this situation currently.
13033                  */
13034                 vm_map_unlock_read(map);
13035                 return(KERN_INVALID_ARGUMENT);
13036         }
13037
13038         object = entry->object.vm_object;
13039         if (object == VM_OBJECT_NULL) {
13040                 /*
13041                  * Object must already be present or we can't sign.
13042                  */
13043                 vm_map_unlock_read(map);
13044                 return KERN_INVALID_ARGUMENT;
13045         }
13046
13047         vm_object_lock(object);
13048         vm_map_unlock_read(map);
13049
13050         while(start < end) {
13051                 uint32_t refmod;
13052
13053                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
13054                 if (m==VM_PAGE_NULL) {
13055                         /* shoud we try to fault a page here? we can probably
13056                          * demand it exists and is locked for this request */
13057                         vm_object_unlock(object);
13058                         return KERN_FAILURE;
13059                 }
13060                 /* deal with special page status */
13061                 if (m->busy ||
13062                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
13063                         vm_object_unlock(object);
13064                         return KERN_FAILURE;
13065                 }
13066
13067                 /* Page is OK... now "validate" it */
13068                 /* This is the place where we'll call out to create a code
13069                  * directory, later */
13070                 m->cs_validated = TRUE;
13071
13072                 /* The page is now "clean" for codesigning purposes. That means
13073                  * we don't consider it as modified (wpmapped) anymore. But
13074                  * we'll disconnect the page so we note any future modification
13075                  * attempts. */
13076                 m->wpmapped = FALSE;
13077                 refmod = pmap_disconnect(m->phys_page);
13078
13079                 /* Pull the dirty status from the pmap, since we cleared the
13080                  * wpmapped bit */
13081                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
13082                         m->dirty = TRUE;
13083                 }
13084
13085                 /* On to the next page */
13086                 start += PAGE_SIZE;
13087         }
13088         vm_object_unlock(object);
13089
13090         return KERN_SUCCESS;
13091 }
13092 #endif
13093
13094 #if CONFIG_FREEZE
13095
13096 kern_return_t vm_map_freeze_walk(
13097                 vm_map_t map,
13098                 unsigned int *purgeable_count,
13099                 unsigned int *wired_count,
13100                 unsigned int *clean_count,
13101                 unsigned int *dirty_count,
13102                 boolean_t *has_shared)
13103 {
13104         vm_map_entry_t entry;
13105
13106         vm_map_lock_read(map);
13107
13108         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13109         *has_shared = FALSE;
13110
13111         for (entry = vm_map_first_entry(map);
13112              entry != vm_map_to_entry(map);
13113              entry = entry->vme_next) {
13114                 unsigned int purgeable, clean, dirty, wired;
13115                 boolean_t shared;
13116
13117                 if ((entry->object.vm_object == 0) ||
13118                     (entry->is_sub_map) ||
13119                     (entry->object.vm_object->phys_contiguous)) {
13120                         continue;
13121                 }
13122
13123                 vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared, entry->object.vm_object, VM_OBJECT_NULL, NULL, NULL);
13124
13125                 *purgeable_count += purgeable;
13126                 *wired_count += wired;
13127                 *clean_count += clean;
13128                 *dirty_count += dirty;
13129
13130                 if (shared) {
13131                         *has_shared = TRUE;
13132                 }
13133         }
13134
13135         vm_map_unlock_read(map);
13136
13137         return KERN_SUCCESS;
13138 }
13139
13140 kern_return_t vm_map_freeze(
13141                 vm_map_t map,
13142                 unsigned int *purgeable_count,
13143                 unsigned int *wired_count,
13144                 unsigned int *clean_count,
13145                 unsigned int *dirty_count,
13146                 boolean_t *has_shared)
13147 {
13148         vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13149         vm_object_t compact_object = VM_OBJECT_NULL;
13150         vm_object_offset_t offset = 0x0;
13151         kern_return_t kr = KERN_SUCCESS;
13152         void *default_freezer_toc = NULL;
13153         boolean_t cleanup = FALSE;
13154
13155         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13156         *has_shared = FALSE;
13157
13158         /* Create our compact object */
13159         compact_object = vm_object_allocate((vm_map_offset_t)(VM_MAX_ADDRESS) - (vm_map_offset_t)(VM_MIN_ADDRESS));
13160         if (!compact_object) {
13161                 kr = KERN_FAILURE;
13162                 goto done;
13163         }
13164
13165         default_freezer_toc = default_freezer_mapping_create(compact_object, offset);
13166         if (!default_freezer_toc) {
13167                 kr = KERN_FAILURE;
13168                 goto done;
13169         }
13170
13171         /*
13172          * We need the exclusive lock here so that we can
13173          * block any page faults or lookups while we are
13174          * in the middle of freezing this vm map.
13175          */
13176         vm_map_lock(map);
13177
13178         if (map->default_freezer_toc != NULL){
13179                 /*
13180                  * This map has already been frozen.
13181                  */
13182                 cleanup = TRUE;
13183                 kr = KERN_SUCCESS;
13184                 goto done;
13185         }
13186
13187         /* Get a mapping in place for the freezing about to commence */
13188         map->default_freezer_toc = default_freezer_toc;
13189
13190         vm_object_lock(compact_object);
13191
13192         for (entry2 = vm_map_first_entry(map);
13193              entry2 != vm_map_to_entry(map);
13194              entry2 = entry2->vme_next) {
13195
13196                 vm_object_t     src_object = entry2->object.vm_object;
13197
13198                 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13199                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13200                         unsigned int purgeable, clean, dirty, wired;
13201                         boolean_t shared;
13202
13203                         vm_object_pack(&purgeable, &wired, &clean, &dirty, &shared,
13204                                                         src_object, compact_object, &default_freezer_toc, &offset);
13205
13206                         *purgeable_count += purgeable;
13207                         *wired_count += wired;
13208                         *clean_count += clean;
13209                         *dirty_count += dirty;
13210
13211                         if (shared) {
13212                                 *has_shared = TRUE;
13213                         }
13214                 }
13215         }
13216
13217         vm_object_unlock(compact_object);
13218
13219         /* Finally, throw out the pages to swap */
13220         vm_object_pageout(compact_object);
13221
13222 done:
13223         vm_map_unlock(map);
13224
13225         /* Unwind if there was a failure */
13226         if ((cleanup) || (KERN_SUCCESS != kr)) {
13227                 if (default_freezer_toc){
13228                         default_freezer_mapping_free(&map->default_freezer_toc, TRUE);
13229                 }
13230                 if (compact_object){
13231                         vm_object_deallocate(compact_object);
13232                 }
13233         }
13234
13235         return kr;
13236 }
13237
13238 __private_extern__ vm_object_t  default_freezer_get_compact_vm_object( void** );
13239
13240 void
13241 vm_map_thaw(
13242         vm_map_t map)
13243 {
13244         void **default_freezer_toc;
13245         vm_object_t compact_object;
13246
13247         vm_map_lock(map);
13248
13249         if (map->default_freezer_toc == NULL){
13250                 /*
13251                  * This map is not in a frozen state.
13252                  */
13253                 goto out;
13254         }
13255
13256         default_freezer_toc = &(map->default_freezer_toc);
13257
13258         compact_object = default_freezer_get_compact_vm_object(default_freezer_toc);
13259
13260         /* Bring the pages back in */
13261         vm_object_pagein(compact_object);
13262
13263         /* Shift pages back to their original objects */
13264         vm_object_unpack(compact_object, default_freezer_toc);
13265
13266         vm_object_deallocate(compact_object);
13267
13268         map->default_freezer_toc = NULL;
13269
13270 out:
13271         vm_map_unlock(map);
13272 }
13273 #endif
13274
13275 #if !CONFIG_EMBEDDED
13276 /*
13277  * vm_map_entry_should_cow_for_true_share:
13278  *
13279  * Determines if the map entry should be clipped and setup for copy-on-write
13280  * to avoid applying "true_share" to a large VM object when only a subset is
13281  * targeted.
13282  *
13283  * For now, we target only the map entries created for the Objective C
13284  * Garbage Collector, which initially have the following properties:
13285  *      - alias == VM_MEMORY_MALLOC
13286  *      - wired_count == 0
13287  *      - !needs_copy
13288  * and a VM object with:
13289  *      - internal
13290  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13291  *      - !true_share
13292  *      - vo_size == ANON_CHUNK_SIZE
13293  */
13294 boolean_t
13295 vm_map_entry_should_cow_for_true_share(
13296         vm_map_entry_t  entry)
13297 {
13298         vm_object_t     object;
13299
13300         if (entry->is_sub_map) {
13301                 /* entry does not point at a VM object */
13302                 return FALSE;
13303         }
13304
13305         if (entry->needs_copy) {
13306                 /* already set for copy_on_write: done! */
13307                 return FALSE;
13308         }
13309
13310         if (entry->alias != VM_MEMORY_MALLOC) {
13311                 /* not tagged as an ObjectiveC's Garbage Collector entry */
13312                 return FALSE;
13313         }
13314
13315         if (entry->wired_count) {
13316                 /* wired: can't change the map entry... */
13317                 return FALSE;
13318         }
13319
13320         object = entry->object.vm_object;
13321
13322         if (object == VM_OBJECT_NULL) {
13323                 /* no object yet... */
13324                 return FALSE;
13325         }
13326
13327         if (!object->internal) {
13328                 /* not an internal object */
13329                 return FALSE;
13330         }
13331
13332         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13333                 /* not the default copy strategy */
13334                 return FALSE;
13335         }
13336
13337         if (object->true_share) {
13338                 /* already true_share: too late to avoid it */
13339                 return FALSE;
13340         }
13341
13342         if (object->vo_size != ANON_CHUNK_SIZE) {
13343                 /* not an object created for the ObjC Garbage Collector */
13344                 return FALSE;
13345         }
13346
13347         /*
13348          * All the criteria match: we have a large object being targeted for "true_share".
13349          * To limit the adverse side-effects linked with "true_share", tell the caller to
13350          * try and avoid setting up the entire object for "true_share" by clipping the
13351          * targeted range and setting it up for copy-on-write.
13352          */
13353         return TRUE;
13354 }
13355 #endif /* !CONFIG_EMBEDDED */