osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <machine/db_machdep.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #ifdef ppc
 106 #include <ppc/mappings.h>
 107 #endif /* ppc */
 108
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_shared_region.h>
 111
 112 /* Internal prototypes
 113  */
 114
 115 static void vm_map_simplify_range(
 116         vm_map_t        map,
 117         vm_map_offset_t start,
 118         vm_map_offset_t end);   /* forward */
 119
 120 static boolean_t        vm_map_range_check(
 121         vm_map_t        map,
 122         vm_map_offset_t start,
 123         vm_map_offset_t end,
 124         vm_map_entry_t  *entry);
 125
 126 static vm_map_entry_t   _vm_map_entry_create(
 127         struct vm_map_header    *map_header);
 128
 129 static void             _vm_map_entry_dispose(
 130         struct vm_map_header    *map_header,
 131         vm_map_entry_t          entry);
 132
 133 static void             vm_map_pmap_enter(
 134         vm_map_t                map,
 135         vm_map_offset_t         addr,
 136         vm_map_offset_t         end_addr,
 137         vm_object_t             object,
 138         vm_object_offset_t      offset,
 139         vm_prot_t               protection);
 140
 141 static void             _vm_map_clip_end(
 142         struct vm_map_header    *map_header,
 143         vm_map_entry_t          entry,
 144         vm_map_offset_t         end);
 145
 146 static void             _vm_map_clip_start(
 147         struct vm_map_header    *map_header,
 148         vm_map_entry_t          entry,
 149         vm_map_offset_t         start);
 150
 151 static void             vm_map_entry_delete(
 152         vm_map_t        map,
 153         vm_map_entry_t  entry);
 154
 155 static kern_return_t    vm_map_delete(
 156         vm_map_t        map,
 157         vm_map_offset_t start,
 158         vm_map_offset_t end,
 159         int             flags,
 160         vm_map_t        zap_map);
 161
 162 static kern_return_t    vm_map_copy_overwrite_unaligned(
 163         vm_map_t        dst_map,
 164         vm_map_entry_t  entry,
 165         vm_map_copy_t   copy,
 166         vm_map_address_t start);
 167
 168 static kern_return_t    vm_map_copy_overwrite_aligned(
 169         vm_map_t        dst_map,
 170         vm_map_entry_t  tmp_entry,
 171         vm_map_copy_t   copy,
 172         vm_map_offset_t start,
 173         pmap_t          pmap);
 174
 175 static kern_return_t    vm_map_copyin_kernel_buffer(
 176         vm_map_t        src_map,
 177         vm_map_address_t src_addr,
 178         vm_map_size_t   len,
 179         boolean_t       src_destroy,
 180         vm_map_copy_t   *copy_result);  /* OUT */
 181
 182 static kern_return_t    vm_map_copyout_kernel_buffer(
 183         vm_map_t        map,
 184         vm_map_address_t *addr, /* IN/OUT */
 185         vm_map_copy_t   copy,
 186         boolean_t       overwrite);
 187
 188 static void             vm_map_fork_share(
 189         vm_map_t        old_map,
 190         vm_map_entry_t  old_entry,
 191         vm_map_t        new_map);
 192
 193 static boolean_t        vm_map_fork_copy(
 194         vm_map_t        old_map,
 195         vm_map_entry_t  *old_entry_p,
 196         vm_map_t        new_map);
 197
 198 void            vm_map_region_top_walk(
 199         vm_map_entry_t             entry,
 200         vm_region_top_info_t       top);
 201
 202 void            vm_map_region_walk(
 203         vm_map_t                   map,
 204         vm_map_offset_t            va,
 205         vm_map_entry_t             entry,
 206         vm_object_offset_t         offset,
 207         vm_object_size_t           range,
 208         vm_region_extended_info_t  extended,
 209         boolean_t                  look_for_pages);
 210
 211 static kern_return_t    vm_map_wire_nested(
 212         vm_map_t                   map,
 213         vm_map_offset_t            start,
 214         vm_map_offset_t            end,
 215         vm_prot_t                  access_type,
 216         boolean_t                  user_wire,
 217         pmap_t                     map_pmap,
 218         vm_map_offset_t            pmap_addr);
 219
 220 static kern_return_t    vm_map_unwire_nested(
 221         vm_map_t                   map,
 222         vm_map_offset_t            start,
 223         vm_map_offset_t            end,
 224         boolean_t                  user_wire,
 225         pmap_t                     map_pmap,
 226         vm_map_offset_t            pmap_addr);
 227
 228 static kern_return_t    vm_map_overwrite_submap_recurse(
 229         vm_map_t                   dst_map,
 230         vm_map_offset_t            dst_addr,
 231         vm_map_size_t              dst_size);
 232
 233 static kern_return_t    vm_map_copy_overwrite_nested(
 234         vm_map_t                   dst_map,
 235         vm_map_offset_t            dst_addr,
 236         vm_map_copy_t              copy,
 237         boolean_t                  interruptible,
 238         pmap_t                     pmap);
 239
 240 static kern_return_t    vm_map_remap_extract(
 241         vm_map_t                map,
 242         vm_map_offset_t         addr,
 243         vm_map_size_t           size,
 244         boolean_t               copy,
 245         struct vm_map_header    *map_header,
 246         vm_prot_t               *cur_protection,
 247         vm_prot_t               *max_protection,
 248         vm_inherit_t            inheritance,
 249         boolean_t               pageable);
 250
 251 static kern_return_t    vm_map_remap_range_allocate(
 252         vm_map_t                map,
 253         vm_map_address_t        *address,
 254         vm_map_size_t           size,
 255         vm_map_offset_t         mask,
 256         boolean_t               anywhere,
 257         vm_map_entry_t          *map_entry);
 258
 259 static void             vm_map_region_look_for_page(
 260         vm_map_t                   map,
 261         vm_map_offset_t            va,
 262         vm_object_t                object,
 263         vm_object_offset_t         offset,
 264         int                        max_refcnt,
 265         int                        depth,
 266         vm_region_extended_info_t  extended);
 267
 268 static int              vm_map_region_count_obj_refs(
 269         vm_map_entry_t             entry,
 270         vm_object_t                object);
 271
 272
 273 static kern_return_t    vm_map_willneed(
 274         vm_map_t        map,
 275         vm_map_offset_t start,
 276         vm_map_offset_t end);
 277
 278 static kern_return_t    vm_map_reuse_pages(
 279         vm_map_t        map,
 280         vm_map_offset_t start,
 281         vm_map_offset_t end);
 282
 283 static kern_return_t    vm_map_reusable_pages(
 284         vm_map_t        map,
 285         vm_map_offset_t start,
 286         vm_map_offset_t end);
 287
 288 static kern_return_t    vm_map_can_reuse(
 289         vm_map_t        map,
 290         vm_map_offset_t start,
 291         vm_map_offset_t end);
 292
 293 /*
 294  * Macros to copy a vm_map_entry. We must be careful to correctly
 295  * manage the wired page count. vm_map_entry_copy() creates a new
 296  * map entry to the same memory - the wired count in the new entry
 297  * must be set to zero. vm_map_entry_copy_full() creates a new
 298  * entry that is identical to the old entry.  This preserves the
 299  * wire count; it's used for map splitting and zone changing in
 300  * vm_map_copyout.
 301  */
 302 #define vm_map_entry_copy(NEW,OLD) \
 303 MACRO_BEGIN                                     \
 304         *(NEW) = *(OLD);                \
 305         (NEW)->is_shared = FALSE;       \
 306         (NEW)->needs_wakeup = FALSE;    \
 307         (NEW)->in_transition = FALSE;   \
 308         (NEW)->wired_count = 0;         \
 309         (NEW)->user_wired_count = 0;    \
 310         (NEW)->permanent = FALSE;       \
 311 MACRO_END
 312
 313 #define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
 314
 315 /*
 316  *      Decide if we want to allow processes to execute from their data or stack areas.
 317  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 318  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 319  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 320  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 321  *      specific pmap files since the default behavior varies according to architecture.  The
 322  *      main reason it varies is because of the need to provide binary compatibility with old
 323  *      applications that were written before these restrictions came into being.  In the old
 324  *      days, an app could execute anything it could read, but this has slowly been tightened
 325  *      up over time.  The default behavior is:
 326  *
 327  *      32-bit PPC apps         may execute from both stack and data areas
 328  *      32-bit Intel apps       may exeucte from data areas but not stack
 329  *      64-bit PPC/Intel apps   may not execute from either data or stack
 330  *
 331  *      An application on any architecture may override these defaults by explicitly
 332  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 333  *      system call.  This code here just determines what happens when an app tries to
 334  *      execute from a page that lacks execute permission.
 335  *
 336  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 337  *      default behavior for both 32 and 64 bit apps on a system-wide basis.
 338  */
 339
 340 extern int allow_data_exec, allow_stack_exec;
 341
 342 int
 343 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 344 {
 345         int current_abi;
 346
 347         /*
 348          * Determine if the app is running in 32 or 64 bit mode.
 349          */
 350
 351         if (vm_map_is_64bit(map))
 352                 current_abi = VM_ABI_64;
 353         else
 354                 current_abi = VM_ABI_32;
 355
 356         /*
 357          * Determine if we should allow the execution based on whether it's a
 358          * stack or data area and the current architecture.
 359          */
 360
 361         if (user_tag == VM_MEMORY_STACK)
 362                 return allow_stack_exec & current_abi;
 363
 364         return allow_data_exec & current_abi;
 365 }
 366
 367
 368 /*
 369  *      Virtual memory maps provide for the mapping, protection,
 370  *      and sharing of virtual memory objects.  In addition,
 371  *      this module provides for an efficient virtual copy of
 372  *      memory from one map to another.
 373  *
 374  *      Synchronization is required prior to most operations.
 375  *
 376  *      Maps consist of an ordered doubly-linked list of simple
 377  *      entries; a single hint is used to speed up lookups.
 378  *
 379  *      Sharing maps have been deleted from this version of Mach.
 380  *      All shared objects are now mapped directly into the respective
 381  *      maps.  This requires a change in the copy on write strategy;
 382  *      the asymmetric (delayed) strategy is used for shared temporary
 383  *      objects instead of the symmetric (shadow) strategy.  All maps
 384  *      are now "top level" maps (either task map, kernel map or submap
 385  *      of the kernel map).
 386  *
 387  *      Since portions of maps are specified by start/end addreses,
 388  *      which may not align with existing map entries, all
 389  *      routines merely "clip" entries to these start/end values.
 390  *      [That is, an entry is split into two, bordering at a
 391  *      start or end value.]  Note that these clippings may not
 392  *      always be necessary (as the two resulting entries are then
 393  *      not changed); however, the clipping is done for convenience.
 394  *      No attempt is currently made to "glue back together" two
 395  *      abutting entries.
 396  *
 397  *      The symmetric (shadow) copy strategy implements virtual copy
 398  *      by copying VM object references from one map to
 399  *      another, and then marking both regions as copy-on-write.
 400  *      It is important to note that only one writeable reference
 401  *      to a VM object region exists in any map when this strategy
 402  *      is used -- this means that shadow object creation can be
 403  *      delayed until a write operation occurs.  The symmetric (delayed)
 404  *      strategy allows multiple maps to have writeable references to
 405  *      the same region of a vm object, and hence cannot delay creating
 406  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 407  *      Copying of permanent objects is completely different; see
 408  *      vm_object_copy_strategically() in vm_object.c.
 409  */
 410
 411 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 412 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 413 static zone_t   vm_map_kentry_zone;     /* zone for kernel entry structures */
 414 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 415
 416
 417 /*
 418  *      Placeholder object for submap operations.  This object is dropped
 419  *      into the range by a call to vm_map_find, and removed when
 420  *      vm_map_submap creates the submap.
 421  */
 422
 423 vm_object_t     vm_submap_object;
 424
 425 static void             *map_data;
 426 static vm_size_t        map_data_size;
 427 static void             *kentry_data;
 428 static vm_size_t        kentry_data_size;
 429 static int              kentry_count = 2048;            /* to init kentry_data_size */
 430
 431 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 432
 433
 434 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 435 unsigned int not_in_kdp = 1;
 436
 437 #if CONFIG_CODE_DECRYPTION
 438 /*
 439  * vm_map_apple_protected:
 440  * This remaps the requested part of the object with an object backed by
 441  * the decrypting pager.
 442  * crypt_info contains entry points and session data for the crypt module.
 443  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 444  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 445  */
 446 kern_return_t
 447 vm_map_apple_protected(
 448         vm_map_t        map,
 449         vm_map_offset_t start,
 450         vm_map_offset_t end,
 451         struct pager_crypt_info *crypt_info)
 452 {
 453         boolean_t       map_locked;
 454         kern_return_t   kr;
 455         vm_map_entry_t  map_entry;
 456         memory_object_t protected_mem_obj;
 457         vm_object_t     protected_object;
 458         vm_map_offset_t map_addr;
 459
 460         vm_map_lock_read(map);
 461         map_locked = TRUE;
 462
 463         /* lookup the protected VM object */
 464         if (!vm_map_lookup_entry(map,
 465                                  start,
 466                                  &map_entry) ||
 467             map_entry->vme_end < end ||
 468             map_entry->is_sub_map) {
 469                 /* that memory is not properly mapped */
 470                 kr = KERN_INVALID_ARGUMENT;
 471                 goto done;
 472         }
 473         protected_object = map_entry->object.vm_object;
 474         if (protected_object == VM_OBJECT_NULL) {
 475                 /* there should be a VM object here at this point */
 476                 kr = KERN_INVALID_ARGUMENT;
 477                 goto done;
 478         }
 479
 480         /* make sure protected object stays alive while map is unlocked */
 481         vm_object_reference(protected_object);
 482
 483         vm_map_unlock_read(map);
 484         map_locked = FALSE;
 485
 486         /*
 487          * Lookup (and create if necessary) the protected memory object
 488          * matching that VM object.
 489          * If successful, this also grabs a reference on the memory object,
 490          * to guarantee that it doesn't go away before we get a chance to map
 491          * it.
 492          */
 493         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 494
 495         /* release extra ref on protected object */
 496         vm_object_deallocate(protected_object);
 497
 498         if (protected_mem_obj == NULL) {
 499                 kr = KERN_FAILURE;
 500                 goto done;
 501         }
 502
 503         /* map this memory object in place of the current one */
 504         map_addr = start;
 505         kr = vm_map_enter_mem_object(map,
 506                                      &map_addr,
 507                                      end - start,
 508                                      (mach_vm_offset_t) 0,
 509                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 510                                      (ipc_port_t) protected_mem_obj,
 511                                      (map_entry->offset +
 512                                       (start - map_entry->vme_start)),
 513                                      TRUE,
 514                                      map_entry->protection,
 515                                      map_entry->max_protection,
 516                                      map_entry->inheritance);
 517         assert(map_addr == start);
 518         /*
 519          * Release the reference obtained by apple_protect_pager_setup().
 520          * The mapping (if it succeeded) is now holding a reference on the
 521          * memory object.
 522          */
 523         memory_object_deallocate(protected_mem_obj);
 524
 525 done:
 526         if (map_locked) {
 527                 vm_map_unlock_read(map);
 528         }
 529         return kr;
 530 }
 531 #endif  /* CONFIG_CODE_DECRYPTION */
 532
 533
 534 lck_grp_t               vm_map_lck_grp;
 535 lck_grp_attr_t  vm_map_lck_grp_attr;
 536 lck_attr_t              vm_map_lck_attr;
 537
 538
 539 /*
 540  *      vm_map_init:
 541  *
 542  *      Initialize the vm_map module.  Must be called before
 543  *      any other vm_map routines.
 544  *
 545  *      Map and entry structures are allocated from zones -- we must
 546  *      initialize those zones.
 547  *
 548  *      There are three zones of interest:
 549  *
 550  *      vm_map_zone:            used to allocate maps.
 551  *      vm_map_entry_zone:      used to allocate map entries.
 552  *      vm_map_kentry_zone:     used to allocate map entries for the kernel.
 553  *
 554  *      The kernel allocates map entries from a special zone that is initially
 555  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 556  *      the kernel to allocate more memory to a entry zone when it became
 557  *      empty since the very act of allocating memory implies the creation
 558  *      of a new entry.
 559  */
 560 void
 561 vm_map_init(
 562         void)
 563 {
 564         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 565                             PAGE_SIZE, "maps");
 566
 567         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 568                                   1024*1024, PAGE_SIZE*5,
 569                                   "non-kernel map entries");
 570
 571         vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 572                                    kentry_data_size, kentry_data_size,
 573                                    "kernel map entries");
 574
 575         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 576                                  16*1024, PAGE_SIZE, "map copies");
 577
 578         /*
 579          *      Cram the map and kentry zones with initial data.
 580          *      Set kentry_zone non-collectible to aid zone_gc().
 581          */
 582         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 583         zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
 584         zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
 585         zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
 586         zcram(vm_map_zone, map_data, map_data_size);
 587         zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
 588
 589         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 590         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 591         lck_attr_setdefault(&vm_map_lck_attr);
 592 }
 593
 594 void
 595 vm_map_steal_memory(
 596         void)
 597 {
 598         map_data_size = round_page(10 * sizeof(struct _vm_map));
 599         map_data = pmap_steal_memory(map_data_size);
 600
 601 #if 0
 602         /*
 603          * Limiting worst case: vm_map_kentry_zone needs to map each "available"
 604          * physical page (i.e. that beyond the kernel image and page tables)
 605          * individually; we guess at most one entry per eight pages in the
 606          * real world. This works out to roughly .1 of 1% of physical memory,
 607          * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
 608          */
 609 #endif
 610         kentry_count = pmap_free_pages() / 8;
 611
 612
 613         kentry_data_size =
 614                 round_page(kentry_count * sizeof(struct vm_map_entry));
 615         kentry_data = pmap_steal_memory(kentry_data_size);
 616 }
 617
 618 /*
 619  *      vm_map_create:
 620  *
 621  *      Creates and returns a new empty VM map with
 622  *      the given physical map structure, and having
 623  *      the given lower and upper address bounds.
 624  */
 625 vm_map_t
 626 vm_map_create(
 627         pmap_t                  pmap,
 628         vm_map_offset_t min,
 629         vm_map_offset_t max,
 630         boolean_t               pageable)
 631 {
 632         static int              color_seed = 0;
 633         register vm_map_t       result;
 634
 635         result = (vm_map_t) zalloc(vm_map_zone);
 636         if (result == VM_MAP_NULL)
 637                 panic("vm_map_create");
 638
 639         vm_map_first_entry(result) = vm_map_to_entry(result);
 640         vm_map_last_entry(result)  = vm_map_to_entry(result);
 641         result->hdr.nentries = 0;
 642         result->hdr.entries_pageable = pageable;
 643
 644         result->size = 0;
 645         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 646         result->user_wire_size  = 0;
 647         result->ref_count = 1;
 648 #if     TASK_SWAPPER
 649         result->res_count = 1;
 650         result->sw_state = MAP_SW_IN;
 651 #endif  /* TASK_SWAPPER */
 652         result->pmap = pmap;
 653         result->min_offset = min;
 654         result->max_offset = max;
 655         result->wiring_required = FALSE;
 656         result->no_zero_fill = FALSE;
 657         result->mapped = FALSE;
 658         result->wait_for_space = FALSE;
 659         result->switch_protect = FALSE;
 660         result->first_free = vm_map_to_entry(result);
 661         result->hint = vm_map_to_entry(result);
 662         result->color_rr = (color_seed++) & vm_color_mask;
 663         vm_map_lock_init(result);
 664         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 665
 666         return(result);
 667 }
 668
 669 /*
 670  *      vm_map_entry_create:    [ internal use only ]
 671  *
 672  *      Allocates a VM map entry for insertion in the
 673  *      given map (or map copy).  No fields are filled.
 674  */
 675 #define vm_map_entry_create(map) \
 676         _vm_map_entry_create(&(map)->hdr)
 677
 678 #define vm_map_copy_entry_create(copy) \
 679         _vm_map_entry_create(&(copy)->cpy_hdr)
 680
 681 static vm_map_entry_t
 682 _vm_map_entry_create(
 683         register struct vm_map_header   *map_header)
 684 {
 685         register zone_t zone;
 686         register vm_map_entry_t entry;
 687
 688         if (map_header->entries_pageable)
 689                 zone = vm_map_entry_zone;
 690         else
 691                 zone = vm_map_kentry_zone;
 692
 693         entry = (vm_map_entry_t) zalloc(zone);
 694         if (entry == VM_MAP_ENTRY_NULL)
 695                 panic("vm_map_entry_create");
 696
 697         return(entry);
 698 }
 699
 700 /*
 701  *      vm_map_entry_dispose:   [ internal use only ]
 702  *
 703  *      Inverse of vm_map_entry_create.
 704  *
 705  *      write map lock held so no need to
 706  *      do anything special to insure correctness
 707  *      of the stores
 708  */
 709 #define vm_map_entry_dispose(map, entry)                        \
 710         MACRO_BEGIN                                             \
 711         if((entry) == (map)->first_free)                        \
 712                 (map)->first_free = vm_map_to_entry(map);       \
 713         if((entry) == (map)->hint)                              \
 714                 (map)->hint = vm_map_to_entry(map);             \
 715         _vm_map_entry_dispose(&(map)->hdr, (entry));            \
 716         MACRO_END
 717
 718 #define vm_map_copy_entry_dispose(map, entry) \
 719         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 720
 721 static void
 722 _vm_map_entry_dispose(
 723         register struct vm_map_header   *map_header,
 724         register vm_map_entry_t         entry)
 725 {
 726         register zone_t         zone;
 727
 728         if (map_header->entries_pageable)
 729                 zone = vm_map_entry_zone;
 730         else
 731                 zone = vm_map_kentry_zone;
 732
 733         zfree(zone, entry);
 734 }
 735
 736 #if MACH_ASSERT
 737 static boolean_t first_free_is_valid(vm_map_t map);     /* forward */
 738 static boolean_t first_free_check = FALSE;
 739 static boolean_t
 740 first_free_is_valid(
 741         vm_map_t        map)
 742 {
 743         vm_map_entry_t  entry, next;
 744
 745         if (!first_free_check)
 746                 return TRUE;
 747
 748         entry = vm_map_to_entry(map);
 749         next = entry->vme_next;
 750         while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
 751                (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
 752                 next != vm_map_to_entry(map))) {
 753                 entry = next;
 754                 next = entry->vme_next;
 755                 if (entry == vm_map_to_entry(map))
 756                         break;
 757         }
 758         if (map->first_free != entry) {
 759                 printf("Bad first_free for map %p: %p should be %p\n",
 760                        map, map->first_free, entry);
 761                 return FALSE;
 762         }
 763         return TRUE;
 764 }
 765 #endif /* MACH_ASSERT */
 766
 767 /*
 768  *      UPDATE_FIRST_FREE:
 769  *
 770  *      Updates the map->first_free pointer to the
 771  *      entry immediately before the first hole in the map.
 772  *      The map should be locked.
 773  */
 774 #define UPDATE_FIRST_FREE(map, new_first_free)                          \
 775         MACRO_BEGIN                                                     \
 776         vm_map_t        UFF_map;                                        \
 777         vm_map_entry_t  UFF_first_free;                                 \
 778         vm_map_entry_t  UFF_next_entry;                                 \
 779         UFF_map = (map);                                                \
 780         UFF_first_free = (new_first_free);                              \
 781         UFF_next_entry = UFF_first_free->vme_next;                      \
 782         while (vm_map_trunc_page(UFF_next_entry->vme_start) ==          \
 783                vm_map_trunc_page(UFF_first_free->vme_end) ||                    \
 784                (vm_map_trunc_page(UFF_next_entry->vme_start) ==                 \
 785                 vm_map_trunc_page(UFF_first_free->vme_start) &&         \
 786                 UFF_next_entry != vm_map_to_entry(UFF_map))) {          \
 787                 UFF_first_free = UFF_next_entry;                        \
 788                 UFF_next_entry = UFF_first_free->vme_next;              \
 789                 if (UFF_first_free == vm_map_to_entry(UFF_map))         \
 790                         break;                                          \
 791         }                                                               \
 792         UFF_map->first_free = UFF_first_free;                           \
 793         assert(first_free_is_valid(UFF_map));                           \
 794         MACRO_END
 795
 796 /*
 797  *      vm_map_entry_{un,}link:
 798  *
 799  *      Insert/remove entries from maps (or map copies).
 800  */
 801 #define vm_map_entry_link(map, after_where, entry)                      \
 802         MACRO_BEGIN                                                     \
 803         vm_map_t VMEL_map;                                              \
 804         vm_map_entry_t VMEL_entry;                                      \
 805         VMEL_map = (map);                                               \
 806         VMEL_entry = (entry);                                           \
 807         _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry);    \
 808         UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free);              \
 809         MACRO_END
 810
 811
 812 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 813         _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 814
 815 #define _vm_map_entry_link(hdr, after_where, entry)                     \
 816         MACRO_BEGIN                                                     \
 817         (hdr)->nentries++;                                              \
 818         (entry)->vme_prev = (after_where);                              \
 819         (entry)->vme_next = (after_where)->vme_next;                    \
 820         (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
 821         MACRO_END
 822
 823 #define vm_map_entry_unlink(map, entry)                                 \
 824         MACRO_BEGIN                                                     \
 825         vm_map_t VMEU_map;                                              \
 826         vm_map_entry_t VMEU_entry;                                      \
 827         vm_map_entry_t VMEU_first_free;                                 \
 828         VMEU_map = (map);                                               \
 829         VMEU_entry = (entry);                                           \
 830         if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start)   \
 831                 VMEU_first_free = VMEU_entry->vme_prev;                 \
 832         else                                                            \
 833                 VMEU_first_free = VMEU_map->first_free;                 \
 834         _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry);               \
 835         UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free);                   \
 836         MACRO_END
 837
 838 #define vm_map_copy_entry_unlink(copy, entry)                           \
 839         _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
 840
 841 #define _vm_map_entry_unlink(hdr, entry)                                \
 842         MACRO_BEGIN                                                     \
 843         (hdr)->nentries--;                                              \
 844         (entry)->vme_next->vme_prev = (entry)->vme_prev;                \
 845         (entry)->vme_prev->vme_next = (entry)->vme_next;                \
 846         MACRO_END
 847
 848 #if     MACH_ASSERT && TASK_SWAPPER
 849 /*
 850  *      vm_map_res_reference:
 851  *
 852  *      Adds another valid residence count to the given map.
 853  *
 854  *      Map is locked so this function can be called from
 855  *      vm_map_swapin.
 856  *
 857  */
 858 void vm_map_res_reference(register vm_map_t map)
 859 {
 860         /* assert map is locked */
 861         assert(map->res_count >= 0);
 862         assert(map->ref_count >= map->res_count);
 863         if (map->res_count == 0) {
 864                 lck_mtx_unlock(&map->s_lock);
 865                 vm_map_lock(map);
 866                 vm_map_swapin(map);
 867                 lck_mtx_lock(&map->s_lock);
 868                 ++map->res_count;
 869                 vm_map_unlock(map);
 870         } else
 871                 ++map->res_count;
 872 }
 873
 874 /*
 875  *      vm_map_reference_swap:
 876  *
 877  *      Adds valid reference and residence counts to the given map.
 878  *
 879  *      The map may not be in memory (i.e. zero residence count).
 880  *
 881  */
 882 void vm_map_reference_swap(register vm_map_t map)
 883 {
 884         assert(map != VM_MAP_NULL);
 885         lck_mtx_lock(&map->s_lock);
 886         assert(map->res_count >= 0);
 887         assert(map->ref_count >= map->res_count);
 888         map->ref_count++;
 889         vm_map_res_reference(map);
 890         lck_mtx_unlock(&map->s_lock);
 891 }
 892
 893 /*
 894  *      vm_map_res_deallocate:
 895  *
 896  *      Decrement residence count on a map; possibly causing swapout.
 897  *
 898  *      The map must be in memory (i.e. non-zero residence count).
 899  *
 900  *      The map is locked, so this function is callable from vm_map_deallocate.
 901  *
 902  */
 903 void vm_map_res_deallocate(register vm_map_t map)
 904 {
 905         assert(map->res_count > 0);
 906         if (--map->res_count == 0) {
 907                 lck_mtx_unlock(&map->s_lock);
 908                 vm_map_lock(map);
 909                 vm_map_swapout(map);
 910                 vm_map_unlock(map);
 911                 lck_mtx_lock(&map->s_lock);
 912         }
 913         assert(map->ref_count >= map->res_count);
 914 }
 915 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 916
 917 /*
 918  *      vm_map_destroy:
 919  *
 920  *      Actually destroy a map.
 921  */
 922 void
 923 vm_map_destroy(
 924         vm_map_t        map,
 925         int             flags)
 926 {
 927         vm_map_lock(map);
 928
 929         /* clean up regular map entries */
 930         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 931                              flags, VM_MAP_NULL);
 932         /* clean up leftover special mappings (commpage, etc...) */
 933 #ifdef __ppc__
 934         /*
 935          * PPC51: ppc64 is limited to 51-bit addresses.
 936          * Memory beyond this 51-bit limit is mapped specially at the
 937          * pmap level, so do not interfere.
 938          * On PPC64, the commpage is mapped beyond the addressable range
 939          * via a special pmap hack, so ask pmap to clean it explicitly...
 940          */
 941         if (map->pmap) {
 942                 pmap_unmap_sharedpage(map->pmap);
 943         }
 944         /* ... and do not let regular pmap cleanup apply here */
 945         flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
 946 #endif /* __ppc__ */
 947         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 948                              flags, VM_MAP_NULL);
 949         vm_map_unlock(map);
 950
 951         assert(map->hdr.nentries == 0);
 952
 953         if(map->pmap)
 954                 pmap_destroy(map->pmap);
 955
 956         zfree(vm_map_zone, map);
 957 }
 958
 959 #if     TASK_SWAPPER
 960 /*
 961  * vm_map_swapin/vm_map_swapout
 962  *
 963  * Swap a map in and out, either referencing or releasing its resources.
 964  * These functions are internal use only; however, they must be exported
 965  * because they may be called from macros, which are exported.
 966  *
 967  * In the case of swapout, there could be races on the residence count,
 968  * so if the residence count is up, we return, assuming that a
 969  * vm_map_deallocate() call in the near future will bring us back.
 970  *
 971  * Locking:
 972  *      -- We use the map write lock for synchronization among races.
 973  *      -- The map write lock, and not the simple s_lock, protects the
 974  *         swap state of the map.
 975  *      -- If a map entry is a share map, then we hold both locks, in
 976  *         hierarchical order.
 977  *
 978  * Synchronization Notes:
 979  *      1) If a vm_map_swapin() call happens while swapout in progress, it
 980  *      will block on the map lock and proceed when swapout is through.
 981  *      2) A vm_map_reference() call at this time is illegal, and will
 982  *      cause a panic.  vm_map_reference() is only allowed on resident
 983  *      maps, since it refuses to block.
 984  *      3) A vm_map_swapin() call during a swapin will block, and
 985  *      proceeed when the first swapin is done, turning into a nop.
 986  *      This is the reason the res_count is not incremented until
 987  *      after the swapin is complete.
 988  *      4) There is a timing hole after the checks of the res_count, before
 989  *      the map lock is taken, during which a swapin may get the lock
 990  *      before a swapout about to happen.  If this happens, the swapin
 991  *      will detect the state and increment the reference count, causing
 992  *      the swapout to be a nop, thereby delaying it until a later
 993  *      vm_map_deallocate.  If the swapout gets the lock first, then
 994  *      the swapin will simply block until the swapout is done, and
 995  *      then proceed.
 996  *
 997  * Because vm_map_swapin() is potentially an expensive operation, it
 998  * should be used with caution.
 999  *
1000  * Invariants:
1001  *      1) A map with a residence count of zero is either swapped, or
1002  *         being swapped.
1003  *      2) A map with a non-zero residence count is either resident,
1004  *         or being swapped in.
1005  */
1006
1007 int vm_map_swap_enable = 1;
1008
1009 void vm_map_swapin (vm_map_t map)
1010 {
1011         register vm_map_entry_t entry;
1012
1013         if (!vm_map_swap_enable)        /* debug */
1014                 return;
1015
1016         /*
1017          * Map is locked
1018          * First deal with various races.
1019          */
1020         if (map->sw_state == MAP_SW_IN)
1021                 /*
1022                  * we raced with swapout and won.  Returning will incr.
1023                  * the res_count, turning the swapout into a nop.
1024                  */
1025                 return;
1026
1027         /*
1028          * The residence count must be zero.  If we raced with another
1029          * swapin, the state would have been IN; if we raced with a
1030          * swapout (after another competing swapin), we must have lost
1031          * the race to get here (see above comment), in which case
1032          * res_count is still 0.
1033          */
1034         assert(map->res_count == 0);
1035
1036         /*
1037          * There are no intermediate states of a map going out or
1038          * coming in, since the map is locked during the transition.
1039          */
1040         assert(map->sw_state == MAP_SW_OUT);
1041
1042         /*
1043          * We now operate upon each map entry.  If the entry is a sub-
1044          * or share-map, we call vm_map_res_reference upon it.
1045          * If the entry is an object, we call vm_object_res_reference
1046          * (this may iterate through the shadow chain).
1047          * Note that we hold the map locked the entire time,
1048          * even if we get back here via a recursive call in
1049          * vm_map_res_reference.
1050          */
1051         entry = vm_map_first_entry(map);
1052
1053         while (entry != vm_map_to_entry(map)) {
1054                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1055                         if (entry->is_sub_map) {
1056                                 vm_map_t lmap = entry->object.sub_map;
1057                                 lck_mtx_lock(&lmap->s_lock);
1058                                 vm_map_res_reference(lmap);
1059                                 lck_mtx_unlock(&lmap->s_lock);
1060                         } else {
1061                                 vm_object_t object = entry->object.vm_object;
1062                                 vm_object_lock(object);
1063                                 /*
1064                                  * This call may iterate through the
1065                                  * shadow chain.
1066                                  */
1067                                 vm_object_res_reference(object);
1068                                 vm_object_unlock(object);
1069                         }
1070                 }
1071                 entry = entry->vme_next;
1072         }
1073         assert(map->sw_state == MAP_SW_OUT);
1074         map->sw_state = MAP_SW_IN;
1075 }
1076
1077 void vm_map_swapout(vm_map_t map)
1078 {
1079         register vm_map_entry_t entry;
1080
1081         /*
1082          * Map is locked
1083          * First deal with various races.
1084          * If we raced with a swapin and lost, the residence count
1085          * will have been incremented to 1, and we simply return.
1086          */
1087         lck_mtx_lock(&map->s_lock);
1088         if (map->res_count != 0) {
1089                 lck_mtx_unlock(&map->s_lock);
1090                 return;
1091         }
1092         lck_mtx_unlock(&map->s_lock);
1093
1094         /*
1095          * There are no intermediate states of a map going out or
1096          * coming in, since the map is locked during the transition.
1097          */
1098         assert(map->sw_state == MAP_SW_IN);
1099
1100         if (!vm_map_swap_enable)
1101                 return;
1102
1103         /*
1104          * We now operate upon each map entry.  If the entry is a sub-
1105          * or share-map, we call vm_map_res_deallocate upon it.
1106          * If the entry is an object, we call vm_object_res_deallocate
1107          * (this may iterate through the shadow chain).
1108          * Note that we hold the map locked the entire time,
1109          * even if we get back here via a recursive call in
1110          * vm_map_res_deallocate.
1111          */
1112         entry = vm_map_first_entry(map);
1113
1114         while (entry != vm_map_to_entry(map)) {
1115                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1116                         if (entry->is_sub_map) {
1117                                 vm_map_t lmap = entry->object.sub_map;
1118                                 lck_mtx_lock(&lmap->s_lock);
1119                                 vm_map_res_deallocate(lmap);
1120                                 lck_mtx_unlock(&lmap->s_lock);
1121                         } else {
1122                                 vm_object_t object = entry->object.vm_object;
1123                                 vm_object_lock(object);
1124                                 /*
1125                                  * This call may take a long time,
1126                                  * since it could actively push
1127                                  * out pages (if we implement it
1128                                  * that way).
1129                                  */
1130                                 vm_object_res_deallocate(object);
1131                                 vm_object_unlock(object);
1132                         }
1133                 }
1134                 entry = entry->vme_next;
1135         }
1136         assert(map->sw_state == MAP_SW_IN);
1137         map->sw_state = MAP_SW_OUT;
1138 }
1139
1140 #endif  /* TASK_SWAPPER */
1141
1142
1143 /*
1144  *      SAVE_HINT_MAP_READ:
1145  *
1146  *      Saves the specified entry as the hint for
1147  *      future lookups.  only a read lock is held on map,
1148  *      so make sure the store is atomic... OSCompareAndSwap
1149  *      guarantees this... also, we don't care if we collide
1150  *      and someone else wins and stores their 'hint'
1151  */
1152 #define SAVE_HINT_MAP_READ(map,value) \
1153         MACRO_BEGIN                                                     \
1154         OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
1155         MACRO_END
1156
1157
1158 /*
1159  *      SAVE_HINT_MAP_WRITE:
1160  *
1161  *      Saves the specified entry as the hint for
1162  *      future lookups.  write lock held on map,
1163  *      so no one else can be writing or looking
1164  *      until the lock is dropped, so it's safe
1165  *      to just do an assignment
1166  */
1167 #define SAVE_HINT_MAP_WRITE(map,value) \
1168         MACRO_BEGIN                    \
1169         (map)->hint = (value);         \
1170         MACRO_END
1171
1172 /*
1173  *      vm_map_lookup_entry:    [ internal use only ]
1174  *
1175  *      Finds the map entry containing (or
1176  *      immediately preceding) the specified address
1177  *      in the given map; the entry is returned
1178  *      in the "entry" parameter.  The boolean
1179  *      result indicates whether the address is
1180  *      actually contained in the map.
1181  */
1182 boolean_t
1183 vm_map_lookup_entry(
1184         register vm_map_t               map,
1185         register vm_map_offset_t        address,
1186         vm_map_entry_t          *entry)         /* OUT */
1187 {
1188         register vm_map_entry_t         cur;
1189         register vm_map_entry_t         last;
1190
1191         /*
1192          *      Start looking either from the head of the
1193          *      list, or from the hint.
1194          */
1195         cur = map->hint;
1196
1197         if (cur == vm_map_to_entry(map))
1198                 cur = cur->vme_next;
1199
1200         if (address >= cur->vme_start) {
1201                 /*
1202                  *      Go from hint to end of list.
1203                  *
1204                  *      But first, make a quick check to see if
1205                  *      we are already looking at the entry we
1206                  *      want (which is usually the case).
1207                  *      Note also that we don't need to save the hint
1208                  *      here... it is the same hint (unless we are
1209                  *      at the header, in which case the hint didn't
1210                  *      buy us anything anyway).
1211                  */
1212                 last = vm_map_to_entry(map);
1213                 if ((cur != last) && (cur->vme_end > address)) {
1214                         *entry = cur;
1215                         return(TRUE);
1216                 }
1217         }
1218         else {
1219                 /*
1220                  *      Go from start to hint, *inclusively*
1221                  */
1222                 last = cur->vme_next;
1223                 cur = vm_map_first_entry(map);
1224         }
1225
1226         /*
1227          *      Search linearly
1228          */
1229
1230         while (cur != last) {
1231                 if (cur->vme_end > address) {
1232                         if (address >= cur->vme_start) {
1233                                 /*
1234                                  *      Save this lookup for future
1235                                  *      hints, and return
1236                                  */
1237
1238                                 *entry = cur;
1239                                 SAVE_HINT_MAP_READ(map, cur);
1240
1241                                 return(TRUE);
1242                         }
1243                         break;
1244                 }
1245                 cur = cur->vme_next;
1246         }
1247         *entry = cur->vme_prev;
1248         SAVE_HINT_MAP_READ(map, *entry);
1249
1250         return(FALSE);
1251 }
1252
1253 /*
1254  *      Routine:        vm_map_find_space
1255  *      Purpose:
1256  *              Allocate a range in the specified virtual address map,
1257  *              returning the entry allocated for that range.
1258  *              Used by kmem_alloc, etc.
1259  *
1260  *              The map must be NOT be locked. It will be returned locked
1261  *              on KERN_SUCCESS, unlocked on failure.
1262  *
1263  *              If an entry is allocated, the object/offset fields
1264  *              are initialized to zero.
1265  */
1266 kern_return_t
1267 vm_map_find_space(
1268         register vm_map_t       map,
1269         vm_map_offset_t         *address,       /* OUT */
1270         vm_map_size_t           size,
1271         vm_map_offset_t         mask,
1272         int                     flags,
1273         vm_map_entry_t          *o_entry)       /* OUT */
1274 {
1275         register vm_map_entry_t entry, new_entry;
1276         register vm_map_offset_t        start;
1277         register vm_map_offset_t        end;
1278
1279         if (size == 0) {
1280                 *address = 0;
1281                 return KERN_INVALID_ARGUMENT;
1282         }
1283
1284         if (flags & VM_FLAGS_GUARD_AFTER) {
1285                 /* account for the back guard page in the size */
1286                 size += PAGE_SIZE_64;
1287         }
1288
1289         new_entry = vm_map_entry_create(map);
1290
1291         /*
1292          *      Look for the first possible address; if there's already
1293          *      something at this address, we have to start after it.
1294          */
1295
1296         vm_map_lock(map);
1297
1298         assert(first_free_is_valid(map));
1299         if ((entry = map->first_free) == vm_map_to_entry(map))
1300                 start = map->min_offset;
1301         else
1302                 start = entry->vme_end;
1303
1304         /*
1305          *      In any case, the "entry" always precedes
1306          *      the proposed new region throughout the loop:
1307          */
1308
1309         while (TRUE) {
1310                 register vm_map_entry_t next;
1311
1312                 /*
1313                  *      Find the end of the proposed new region.
1314                  *      Be sure we didn't go beyond the end, or
1315                  *      wrap around the address.
1316                  */
1317
1318                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1319                         /* reserve space for the front guard page */
1320                         start += PAGE_SIZE_64;
1321                 }
1322                 end = ((start + mask) & ~mask);
1323
1324                 if (end < start) {
1325                         vm_map_entry_dispose(map, new_entry);
1326                         vm_map_unlock(map);
1327                         return(KERN_NO_SPACE);
1328                 }
1329                 start = end;
1330                 end += size;
1331
1332                 if ((end > map->max_offset) || (end < start)) {
1333                         vm_map_entry_dispose(map, new_entry);
1334                         vm_map_unlock(map);
1335                         return(KERN_NO_SPACE);
1336                 }
1337
1338                 /*
1339                  *      If there are no more entries, we must win.
1340                  */
1341
1342                 next = entry->vme_next;
1343                 if (next == vm_map_to_entry(map))
1344                         break;
1345
1346                 /*
1347                  *      If there is another entry, it must be
1348                  *      after the end of the potential new region.
1349                  */
1350
1351                 if (next->vme_start >= end)
1352                         break;
1353
1354                 /*
1355                  *      Didn't fit -- move to the next entry.
1356                  */
1357
1358                 entry = next;
1359                 start = entry->vme_end;
1360         }
1361
1362         /*
1363          *      At this point,
1364          *              "start" and "end" should define the endpoints of the
1365          *                      available new range, and
1366          *              "entry" should refer to the region before the new
1367          *                      range, and
1368          *
1369          *              the map should be locked.
1370          */
1371
1372         if (flags & VM_FLAGS_GUARD_BEFORE) {
1373                 /* go back for the front guard page */
1374                 start -= PAGE_SIZE_64;
1375         }
1376         *address = start;
1377
1378         new_entry->vme_start = start;
1379         new_entry->vme_end = end;
1380         assert(page_aligned(new_entry->vme_start));
1381         assert(page_aligned(new_entry->vme_end));
1382
1383         new_entry->is_shared = FALSE;
1384         new_entry->is_sub_map = FALSE;
1385         new_entry->use_pmap = FALSE;
1386         new_entry->object.vm_object = VM_OBJECT_NULL;
1387         new_entry->offset = (vm_object_offset_t) 0;
1388
1389         new_entry->needs_copy = FALSE;
1390
1391         new_entry->inheritance = VM_INHERIT_DEFAULT;
1392         new_entry->protection = VM_PROT_DEFAULT;
1393         new_entry->max_protection = VM_PROT_ALL;
1394         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1395         new_entry->wired_count = 0;
1396         new_entry->user_wired_count = 0;
1397
1398         new_entry->in_transition = FALSE;
1399         new_entry->needs_wakeup = FALSE;
1400         new_entry->no_cache = FALSE;
1401         new_entry->permanent = FALSE;
1402         new_entry->superpage_size = 0;
1403
1404         new_entry->alias = 0;
1405         new_entry->zero_wired_pages = FALSE;
1406
1407         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1408
1409         /*
1410          *      Insert the new entry into the list
1411          */
1412
1413         vm_map_entry_link(map, entry, new_entry);
1414
1415         map->size += size;
1416
1417         /*
1418          *      Update the lookup hint
1419          */
1420         SAVE_HINT_MAP_WRITE(map, new_entry);
1421
1422         *o_entry = new_entry;
1423         return(KERN_SUCCESS);
1424 }
1425
1426 int vm_map_pmap_enter_print = FALSE;
1427 int vm_map_pmap_enter_enable = FALSE;
1428
1429 /*
1430  *      Routine:        vm_map_pmap_enter [internal only]
1431  *
1432  *      Description:
1433  *              Force pages from the specified object to be entered into
1434  *              the pmap at the specified address if they are present.
1435  *              As soon as a page not found in the object the scan ends.
1436  *
1437  *      Returns:
1438  *              Nothing.
1439  *
1440  *      In/out conditions:
1441  *              The source map should not be locked on entry.
1442  */
1443 static void
1444 vm_map_pmap_enter(
1445         vm_map_t                map,
1446         register vm_map_offset_t        addr,
1447         register vm_map_offset_t        end_addr,
1448         register vm_object_t    object,
1449         vm_object_offset_t      offset,
1450         vm_prot_t               protection)
1451 {
1452         int                     type_of_fault;
1453         kern_return_t           kr;
1454
1455         if(map->pmap == 0)
1456                 return;
1457
1458         while (addr < end_addr) {
1459                 register vm_page_t      m;
1460
1461                 vm_object_lock(object);
1462
1463                 m = vm_page_lookup(object, offset);
1464                 /*
1465                  * ENCRYPTED SWAP:
1466                  * The user should never see encrypted data, so do not
1467                  * enter an encrypted page in the page table.
1468                  */
1469                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1470                     m->fictitious ||
1471                     (m->unusual && ( m->error || m->restart || m->absent))) {
1472                         vm_object_unlock(object);
1473                         return;
1474                 }
1475
1476                 if (vm_map_pmap_enter_print) {
1477                         printf("vm_map_pmap_enter:");
1478                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1479                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1480                 }
1481                 type_of_fault = DBG_CACHE_HIT_FAULT;
1482                 kr = vm_fault_enter(m, map->pmap, addr, protection,
1483                                     VM_PAGE_WIRED(m), FALSE, FALSE,
1484                                     &type_of_fault);
1485
1486                 vm_object_unlock(object);
1487
1488                 offset += PAGE_SIZE_64;
1489                 addr += PAGE_SIZE;
1490         }
1491 }
1492
1493 boolean_t vm_map_pmap_is_empty(
1494         vm_map_t        map,
1495         vm_map_offset_t start,
1496         vm_map_offset_t end);
1497 boolean_t vm_map_pmap_is_empty(
1498         vm_map_t        map,
1499         vm_map_offset_t start,
1500         vm_map_offset_t end)
1501 {
1502 #ifdef MACHINE_PMAP_IS_EMPTY
1503         return pmap_is_empty(map->pmap, start, end);
1504 #else   /* MACHINE_PMAP_IS_EMPTY */
1505         vm_map_offset_t offset;
1506         ppnum_t         phys_page;
1507
1508         if (map->pmap == NULL) {
1509                 return TRUE;
1510         }
1511
1512         for (offset = start;
1513              offset < end;
1514              offset += PAGE_SIZE) {
1515                 phys_page = pmap_find_phys(map->pmap, offset);
1516                 if (phys_page) {
1517                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1518                                 "page %d at 0x%llx\n",
1519                                 map, (long long)start, (long long)end,
1520                                 phys_page, (long long)offset);
1521                         return FALSE;
1522                 }
1523         }
1524         return TRUE;
1525 #endif  /* MACHINE_PMAP_IS_EMPTY */
1526 }
1527
1528 /*
1529  *      Routine:        vm_map_enter
1530  *
1531  *      Description:
1532  *              Allocate a range in the specified virtual address map.
1533  *              The resulting range will refer to memory defined by
1534  *              the given memory object and offset into that object.
1535  *
1536  *              Arguments are as defined in the vm_map call.
1537  */
1538 int _map_enter_debug = 0;
1539 static unsigned int vm_map_enter_restore_successes = 0;
1540 static unsigned int vm_map_enter_restore_failures = 0;
1541 kern_return_t
1542 vm_map_enter(
1543         vm_map_t                map,
1544         vm_map_offset_t         *address,       /* IN/OUT */
1545         vm_map_size_t           size,
1546         vm_map_offset_t         mask,
1547         int                     flags,
1548         vm_object_t             object,
1549         vm_object_offset_t      offset,
1550         boolean_t               needs_copy,
1551         vm_prot_t               cur_protection,
1552         vm_prot_t               max_protection,
1553         vm_inherit_t            inheritance)
1554 {
1555         vm_map_entry_t          entry, new_entry;
1556         vm_map_offset_t         start, tmp_start, tmp_offset;
1557         vm_map_offset_t         end, tmp_end;
1558         vm_map_offset_t         tmp2_start, tmp2_end;
1559         vm_map_offset_t         step;
1560         kern_return_t           result = KERN_SUCCESS;
1561         vm_map_t                zap_old_map = VM_MAP_NULL;
1562         vm_map_t                zap_new_map = VM_MAP_NULL;
1563         boolean_t               map_locked = FALSE;
1564         boolean_t               pmap_empty = TRUE;
1565         boolean_t               new_mapping_established = FALSE;
1566         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1567         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1568         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1569         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1570         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1571         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1572         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1573         char                    alias;
1574         vm_map_offset_t         effective_min_offset, effective_max_offset;
1575         kern_return_t           kr;
1576
1577         if (superpage_size) {
1578                 switch (superpage_size) {
1579                         /*
1580                          * Note that the current implementation only supports
1581                          * a single size for superpages, SUPERPAGE_SIZE, per
1582                          * architecture. As soon as more sizes are supposed
1583                          * to be supported, SUPERPAGE_SIZE has to be replaced
1584                          * with a lookup of the size depending on superpage_size.
1585                          */
1586 #ifdef __x86_64__
1587                         case SUPERPAGE_SIZE_2MB:
1588                                 break;
1589 #endif
1590                         default:
1591                                 return KERN_INVALID_ARGUMENT;
1592                 }
1593                 mask = SUPERPAGE_SIZE-1;
1594                 if (size & (SUPERPAGE_SIZE-1))
1595                         return KERN_INVALID_ARGUMENT;
1596                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1597         }
1598
1599 #if CONFIG_EMBEDDED
1600         if (cur_protection & VM_PROT_WRITE) {
1601                 if (cur_protection & VM_PROT_EXECUTE) {
1602                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1603                         cur_protection &= ~VM_PROT_EXECUTE;
1604                 }
1605         }
1606 #endif /* CONFIG_EMBEDDED */
1607
1608         if (is_submap) {
1609                 if (purgable) {
1610                         /* submaps can not be purgeable */
1611                         return KERN_INVALID_ARGUMENT;
1612                 }
1613                 if (object == VM_OBJECT_NULL) {
1614                         /* submaps can not be created lazily */
1615                         return KERN_INVALID_ARGUMENT;
1616                 }
1617         }
1618         if (flags & VM_FLAGS_ALREADY) {
1619                 /*
1620                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1621                  * is already present.  For it to be meaningul, the requested
1622                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1623                  * we shouldn't try and remove what was mapped there first
1624                  * (!VM_FLAGS_OVERWRITE).
1625                  */
1626                 if ((flags & VM_FLAGS_ANYWHERE) ||
1627                     (flags & VM_FLAGS_OVERWRITE)) {
1628                         return KERN_INVALID_ARGUMENT;
1629                 }
1630         }
1631
1632         if (flags & VM_FLAGS_BELOW_MIN) {
1633                 /*
1634                  * Allow an insertion below the map's min offset.
1635                  */
1636                 effective_min_offset = 0ULL;
1637         } else {
1638                 effective_min_offset = map->min_offset;
1639         }
1640
1641         if (flags & VM_FLAGS_BEYOND_MAX) {
1642                 /*
1643                  * Allow an insertion beyond the map's max offset.
1644                  */
1645                 if (vm_map_is_64bit(map))
1646                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1647                 else
1648                         effective_max_offset = 0x00000000FFFFF000ULL;
1649         } else {
1650                 effective_max_offset = map->max_offset;
1651         }
1652
1653         if (size == 0 ||
1654             (offset & PAGE_MASK_64) != 0) {
1655                 *address = 0;
1656                 return KERN_INVALID_ARGUMENT;
1657         }
1658
1659         VM_GET_FLAGS_ALIAS(flags, alias);
1660
1661 #define RETURN(value)   { result = value; goto BailOut; }
1662
1663         assert(page_aligned(*address));
1664         assert(page_aligned(size));
1665
1666         /*
1667          * Only zero-fill objects are allowed to be purgable.
1668          * LP64todo - limit purgable objects to 32-bits for now
1669          */
1670         if (purgable &&
1671             (offset != 0 ||
1672              (object != VM_OBJECT_NULL &&
1673               (object->size != size ||
1674                object->purgable == VM_PURGABLE_DENY))
1675              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1676                 return KERN_INVALID_ARGUMENT;
1677
1678         if (!anywhere && overwrite) {
1679                 /*
1680                  * Create a temporary VM map to hold the old mappings in the
1681                  * affected area while we create the new one.
1682                  * This avoids releasing the VM map lock in
1683                  * vm_map_entry_delete() and allows atomicity
1684                  * when we want to replace some mappings with a new one.
1685                  * It also allows us to restore the old VM mappings if the
1686                  * new mapping fails.
1687                  */
1688                 zap_old_map = vm_map_create(PMAP_NULL,
1689                                             *address,
1690                                             *address + size,
1691                                             map->hdr.entries_pageable);
1692         }
1693
1694 StartAgain: ;
1695
1696         start = *address;
1697
1698         if (anywhere) {
1699                 vm_map_lock(map);
1700                 map_locked = TRUE;
1701
1702                 /*
1703                  *      Calculate the first possible address.
1704                  */
1705
1706                 if (start < effective_min_offset)
1707                         start = effective_min_offset;
1708                 if (start > effective_max_offset)
1709                         RETURN(KERN_NO_SPACE);
1710
1711                 /*
1712                  *      Look for the first possible address;
1713                  *      if there's already something at this
1714                  *      address, we have to start after it.
1715                  */
1716
1717                 assert(first_free_is_valid(map));
1718                 if (start == effective_min_offset) {
1719                         if ((entry = map->first_free) != vm_map_to_entry(map))
1720                                 start = entry->vme_end;
1721                 } else {
1722                         vm_map_entry_t  tmp_entry;
1723                         if (vm_map_lookup_entry(map, start, &tmp_entry))
1724                                 start = tmp_entry->vme_end;
1725                         entry = tmp_entry;
1726                 }
1727
1728                 /*
1729                  *      In any case, the "entry" always precedes
1730                  *      the proposed new region throughout the
1731                  *      loop:
1732                  */
1733
1734                 while (TRUE) {
1735                         register vm_map_entry_t next;
1736
1737                         /*
1738                          *      Find the end of the proposed new region.
1739                          *      Be sure we didn't go beyond the end, or
1740                          *      wrap around the address.
1741                          */
1742
1743                         end = ((start + mask) & ~mask);
1744                         if (end < start)
1745                                 RETURN(KERN_NO_SPACE);
1746                         start = end;
1747                         end += size;
1748
1749                         if ((end > effective_max_offset) || (end < start)) {
1750                                 if (map->wait_for_space) {
1751                                         if (size <= (effective_max_offset -
1752                                                      effective_min_offset)) {
1753                                                 assert_wait((event_t)map,
1754                                                             THREAD_ABORTSAFE);
1755                                                 vm_map_unlock(map);
1756                                                 map_locked = FALSE;
1757                                                 thread_block(THREAD_CONTINUE_NULL);
1758                                                 goto StartAgain;
1759                                         }
1760                                 }
1761                                 RETURN(KERN_NO_SPACE);
1762                         }
1763
1764                         /*
1765                          *      If there are no more entries, we must win.
1766                          */
1767
1768                         next = entry->vme_next;
1769                         if (next == vm_map_to_entry(map))
1770                                 break;
1771
1772                         /*
1773                          *      If there is another entry, it must be
1774                          *      after the end of the potential new region.
1775                          */
1776
1777                         if (next->vme_start >= end)
1778                                 break;
1779
1780                         /*
1781                          *      Didn't fit -- move to the next entry.
1782                          */
1783
1784                         entry = next;
1785                         start = entry->vme_end;
1786                 }
1787                 *address = start;
1788         } else {
1789                 /*
1790                  *      Verify that:
1791                  *              the address doesn't itself violate
1792                  *              the mask requirement.
1793                  */
1794
1795                 vm_map_lock(map);
1796                 map_locked = TRUE;
1797                 if ((start & mask) != 0)
1798                         RETURN(KERN_NO_SPACE);
1799
1800                 /*
1801                  *      ...     the address is within bounds
1802                  */
1803
1804                 end = start + size;
1805
1806                 if ((start < effective_min_offset) ||
1807                     (end > effective_max_offset) ||
1808                     (start >= end)) {
1809                         RETURN(KERN_INVALID_ADDRESS);
1810                 }
1811
1812                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1813                         /*
1814                          * Fixed mapping and "overwrite" flag: attempt to
1815                          * remove all existing mappings in the specified
1816                          * address range, saving them in our "zap_old_map".
1817                          */
1818                         (void) vm_map_delete(map, start, end,
1819                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1820                                              zap_old_map);
1821                 }
1822
1823                 /*
1824                  *      ...     the starting address isn't allocated
1825                  */
1826
1827                 if (vm_map_lookup_entry(map, start, &entry)) {
1828                         if (! (flags & VM_FLAGS_ALREADY)) {
1829                                 RETURN(KERN_NO_SPACE);
1830                         }
1831                         /*
1832                          * Check if what's already there is what we want.
1833                          */
1834                         tmp_start = start;
1835                         tmp_offset = offset;
1836                         if (entry->vme_start < start) {
1837                                 tmp_start -= start - entry->vme_start;
1838                                 tmp_offset -= start - entry->vme_start;
1839
1840                         }
1841                         for (; entry->vme_start < end;
1842                              entry = entry->vme_next) {
1843                                 /*
1844                                  * Check if the mapping's attributes
1845                                  * match the existing map entry.
1846                                  */
1847                                 if (entry == vm_map_to_entry(map) ||
1848                                     entry->vme_start != tmp_start ||
1849                                     entry->is_sub_map != is_submap ||
1850                                     entry->offset != tmp_offset ||
1851                                     entry->needs_copy != needs_copy ||
1852                                     entry->protection != cur_protection ||
1853                                     entry->max_protection != max_protection ||
1854                                     entry->inheritance != inheritance ||
1855                                     entry->alias != alias) {
1856                                         /* not the same mapping ! */
1857                                         RETURN(KERN_NO_SPACE);
1858                                 }
1859                                 /*
1860                                  * Check if the same object is being mapped.
1861                                  */
1862                                 if (is_submap) {
1863                                         if (entry->object.sub_map !=
1864                                             (vm_map_t) object) {
1865                                                 /* not the same submap */
1866                                                 RETURN(KERN_NO_SPACE);
1867                                         }
1868                                 } else {
1869                                         if (entry->object.vm_object != object) {
1870                                                 /* not the same VM object... */
1871                                                 vm_object_t obj2;
1872
1873                                                 obj2 = entry->object.vm_object;
1874                                                 if ((obj2 == VM_OBJECT_NULL ||
1875                                                      obj2->internal) &&
1876                                                     (object == VM_OBJECT_NULL ||
1877                                                      object->internal)) {
1878                                                         /*
1879                                                          * ... but both are
1880                                                          * anonymous memory,
1881                                                          * so equivalent.
1882                                                          */
1883                                                 } else {
1884                                                         RETURN(KERN_NO_SPACE);
1885                                                 }
1886                                         }
1887                                 }
1888
1889                                 tmp_offset += entry->vme_end - entry->vme_start;
1890                                 tmp_start += entry->vme_end - entry->vme_start;
1891                                 if (entry->vme_end >= end) {
1892                                         /* reached the end of our mapping */
1893                                         break;
1894                                 }
1895                         }
1896                         /* it all matches:  let's use what's already there ! */
1897                         RETURN(KERN_MEMORY_PRESENT);
1898                 }
1899
1900                 /*
1901                  *      ...     the next region doesn't overlap the
1902                  *              end point.
1903                  */
1904
1905                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1906                     (entry->vme_next->vme_start < end))
1907                         RETURN(KERN_NO_SPACE);
1908         }
1909
1910         /*
1911          *      At this point,
1912          *              "start" and "end" should define the endpoints of the
1913          *                      available new range, and
1914          *              "entry" should refer to the region before the new
1915          *                      range, and
1916          *
1917          *              the map should be locked.
1918          */
1919
1920         /*
1921          *      See whether we can avoid creating a new entry (and object) by
1922          *      extending one of our neighbors.  [So far, we only attempt to
1923          *      extend from below.]  Note that we can never extend/join
1924          *      purgable objects because they need to remain distinct
1925          *      entities in order to implement their "volatile object"
1926          *      semantics.
1927          */
1928
1929         if (purgable) {
1930                 if (object == VM_OBJECT_NULL) {
1931                         object = vm_object_allocate(size);
1932                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1933                         object->purgable = VM_PURGABLE_NONVOLATILE;
1934                         offset = (vm_object_offset_t)0;
1935                 }
1936         } else if ((is_submap == FALSE) &&
1937                    (object == VM_OBJECT_NULL) &&
1938                    (entry != vm_map_to_entry(map)) &&
1939                    (entry->vme_end == start) &&
1940                    (!entry->is_shared) &&
1941                    (!entry->is_sub_map) &&
1942                    (entry->alias == alias) &&
1943                    (entry->inheritance == inheritance) &&
1944                    (entry->protection == cur_protection) &&
1945                    (entry->max_protection == max_protection) &&
1946                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1947                    (entry->in_transition == 0) &&
1948                    (entry->no_cache == no_cache) &&
1949                    ((entry->vme_end - entry->vme_start) + size <=
1950                     (alias == VM_MEMORY_REALLOC ?
1951                      ANON_CHUNK_SIZE :
1952                      NO_COALESCE_LIMIT)) &&
1953                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1954                 if (vm_object_coalesce(entry->object.vm_object,
1955                                        VM_OBJECT_NULL,
1956                                        entry->offset,
1957                                        (vm_object_offset_t) 0,
1958                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1959                                        (vm_map_size_t)(end - entry->vme_end))) {
1960
1961                         /*
1962                          *      Coalesced the two objects - can extend
1963                          *      the previous map entry to include the
1964                          *      new range.
1965                          */
1966                         map->size += (end - entry->vme_end);
1967                         entry->vme_end = end;
1968                         UPDATE_FIRST_FREE(map, map->first_free);
1969                         RETURN(KERN_SUCCESS);
1970                 }
1971         }
1972
1973         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1974         new_entry = NULL;
1975
1976         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1977                 tmp2_end = tmp2_start + step;
1978                 /*
1979                  *      Create a new entry
1980                  *      LP64todo - for now, we can only allocate 4GB internal objects
1981                  *      because the default pager can't page bigger ones.  Remove this
1982                  *      when it can.
1983                  *
1984                  * XXX FBDP
1985                  * The reserved "page zero" in each process's address space can
1986                  * be arbitrarily large.  Splitting it into separate 4GB objects and
1987                  * therefore different VM map entries serves no purpose and just
1988                  * slows down operations on the VM map, so let's not split the
1989                  * allocation into 4GB chunks if the max protection is NONE.  That
1990                  * memory should never be accessible, so it will never get to the
1991                  * default pager.
1992                  */
1993                 tmp_start = tmp2_start;
1994                 if (object == VM_OBJECT_NULL &&
1995                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
1996                     max_protection != VM_PROT_NONE &&
1997                     superpage_size == 0)
1998                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
1999                 else
2000                         tmp_end = tmp2_end;
2001                 do {
2002                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2003                                                         object, offset, needs_copy,
2004                                                         FALSE, FALSE,
2005                                                         cur_protection, max_protection,
2006                                                         VM_BEHAVIOR_DEFAULT,
2007                                                         inheritance, 0, no_cache,
2008                                                         permanent, superpage_size);
2009                         new_entry->alias = alias;
2010                         if (is_submap) {
2011                                 vm_map_t        submap;
2012                                 boolean_t       submap_is_64bit;
2013                                 boolean_t       use_pmap;
2014
2015                                 new_entry->is_sub_map = TRUE;
2016                                 submap = (vm_map_t) object;
2017                                 submap_is_64bit = vm_map_is_64bit(submap);
2018                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2019         #ifndef NO_NESTED_PMAP
2020                                 if (use_pmap && submap->pmap == NULL) {
2021                                         /* we need a sub pmap to nest... */
2022                                         submap->pmap = pmap_create(0, submap_is_64bit);
2023                                         if (submap->pmap == NULL) {
2024                                                 /* let's proceed without nesting... */
2025                                         }
2026                                 }
2027                                 if (use_pmap && submap->pmap != NULL) {
2028                                         kr = pmap_nest(map->pmap,
2029                                                        submap->pmap,
2030                                                        tmp_start,
2031                                                        tmp_start,
2032                                                        tmp_end - tmp_start);
2033                                         if (kr != KERN_SUCCESS) {
2034                                                 printf("vm_map_enter: "
2035                                                        "pmap_nest(0x%llx,0x%llx) "
2036                                                        "error 0x%x\n",
2037                                                        (long long)tmp_start,
2038                                                        (long long)tmp_end,
2039                                                        kr);
2040                                         } else {
2041                                                 /* we're now nested ! */
2042                                                 new_entry->use_pmap = TRUE;
2043                                                 pmap_empty = FALSE;
2044                                         }
2045                                 }
2046         #endif /* NO_NESTED_PMAP */
2047                         }
2048                         entry = new_entry;
2049
2050                         if (superpage_size) {
2051                                 vm_page_t pages, m;
2052                                 vm_object_t sp_object;
2053
2054                                 entry->offset = 0;
2055
2056                                 /* allocate one superpage */
2057                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2058                                 if (kr != KERN_SUCCESS) {
2059                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
2060                                         RETURN(kr);
2061                                 }
2062
2063                                 /* create one vm_object per superpage */
2064                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2065                                 sp_object->phys_contiguous = TRUE;
2066                                 sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2067                                 entry->object.vm_object = sp_object;
2068
2069                                 /* enter the base pages into the object */
2070                                 vm_object_lock(sp_object);
2071                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2072                                         m = pages;
2073                                         pmap_zero_page(m->phys_page);
2074                                         pages = NEXT_PAGE(m);
2075                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2076                                         vm_page_insert(m, sp_object, offset);
2077                                 }
2078                                 vm_object_unlock(sp_object);
2079                         }
2080                 } while (tmp_end != tmp2_end &&
2081                          (tmp_start = tmp_end) &&
2082                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2083                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2084         }
2085
2086         vm_map_unlock(map);
2087         map_locked = FALSE;
2088
2089         new_mapping_established = TRUE;
2090
2091         /*      Wire down the new entry if the user
2092          *      requested all new map entries be wired.
2093          */
2094         if ((map->wiring_required)||(superpage_size)) {
2095                 pmap_empty = FALSE; /* pmap won't be empty */
2096                 result = vm_map_wire(map, start, end,
2097                                      new_entry->protection, TRUE);
2098                 RETURN(result);
2099         }
2100
2101         if ((object != VM_OBJECT_NULL) &&
2102             (vm_map_pmap_enter_enable) &&
2103             (!anywhere)  &&
2104             (!needs_copy) &&
2105             (size < (128*1024))) {
2106                 pmap_empty = FALSE; /* pmap won't be empty */
2107
2108                 if (override_nx(map, alias) && cur_protection)
2109                         cur_protection |= VM_PROT_EXECUTE;
2110
2111                 vm_map_pmap_enter(map, start, end,
2112                                   object, offset, cur_protection);
2113         }
2114
2115 BailOut: ;
2116         if (result == KERN_SUCCESS) {
2117                 vm_prot_t pager_prot;
2118                 memory_object_t pager;
2119
2120                 if (pmap_empty &&
2121                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2122                         assert(vm_map_pmap_is_empty(map,
2123                                                     *address,
2124                                                     *address+size));
2125                 }
2126
2127                 /*
2128                  * For "named" VM objects, let the pager know that the
2129                  * memory object is being mapped.  Some pagers need to keep
2130                  * track of this, to know when they can reclaim the memory
2131                  * object, for example.
2132                  * VM calls memory_object_map() for each mapping (specifying
2133                  * the protection of each mapping) and calls
2134                  * memory_object_last_unmap() when all the mappings are gone.
2135                  */
2136                 pager_prot = max_protection;
2137                 if (needs_copy) {
2138                         /*
2139                          * Copy-On-Write mapping: won't modify
2140                          * the memory object.
2141                          */
2142                         pager_prot &= ~VM_PROT_WRITE;
2143                 }
2144                 if (!is_submap &&
2145                     object != VM_OBJECT_NULL &&
2146                     object->named &&
2147                     object->pager != MEMORY_OBJECT_NULL) {
2148                         vm_object_lock(object);
2149                         pager = object->pager;
2150                         if (object->named &&
2151                             pager != MEMORY_OBJECT_NULL) {
2152                                 assert(object->pager_ready);
2153                                 vm_object_mapping_wait(object, THREAD_UNINT);
2154                                 vm_object_mapping_begin(object);
2155                                 vm_object_unlock(object);
2156
2157                                 kr = memory_object_map(pager, pager_prot);
2158                                 assert(kr == KERN_SUCCESS);
2159
2160                                 vm_object_lock(object);
2161                                 vm_object_mapping_end(object);
2162                         }
2163                         vm_object_unlock(object);
2164                 }
2165         } else {
2166                 if (new_mapping_established) {
2167                         /*
2168                          * We have to get rid of the new mappings since we
2169                          * won't make them available to the user.
2170                          * Try and do that atomically, to minimize the risk
2171                          * that someone else create new mappings that range.
2172                          */
2173                         zap_new_map = vm_map_create(PMAP_NULL,
2174                                                     *address,
2175                                                     *address + size,
2176                                                     map->hdr.entries_pageable);
2177                         if (!map_locked) {
2178                                 vm_map_lock(map);
2179                                 map_locked = TRUE;
2180                         }
2181                         (void) vm_map_delete(map, *address, *address+size,
2182                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2183                                              zap_new_map);
2184                 }
2185                 if (zap_old_map != VM_MAP_NULL &&
2186                     zap_old_map->hdr.nentries != 0) {
2187                         vm_map_entry_t  entry1, entry2;
2188
2189                         /*
2190                          * The new mapping failed.  Attempt to restore
2191                          * the old mappings, saved in the "zap_old_map".
2192                          */
2193                         if (!map_locked) {
2194                                 vm_map_lock(map);
2195                                 map_locked = TRUE;
2196                         }
2197
2198                         /* first check if the coast is still clear */
2199                         start = vm_map_first_entry(zap_old_map)->vme_start;
2200                         end = vm_map_last_entry(zap_old_map)->vme_end;
2201                         if (vm_map_lookup_entry(map, start, &entry1) ||
2202                             vm_map_lookup_entry(map, end, &entry2) ||
2203                             entry1 != entry2) {
2204                                 /*
2205                                  * Part of that range has already been
2206                                  * re-mapped:  we can't restore the old
2207                                  * mappings...
2208                                  */
2209                                 vm_map_enter_restore_failures++;
2210                         } else {
2211                                 /*
2212                                  * Transfer the saved map entries from
2213                                  * "zap_old_map" to the original "map",
2214                                  * inserting them all after "entry1".
2215                                  */
2216                                 for (entry2 = vm_map_first_entry(zap_old_map);
2217                                      entry2 != vm_map_to_entry(zap_old_map);
2218                                      entry2 = vm_map_first_entry(zap_old_map)) {
2219                                         vm_map_size_t entry_size;
2220
2221                                         entry_size = (entry2->vme_end -
2222                                                       entry2->vme_start);
2223                                         vm_map_entry_unlink(zap_old_map,
2224                                                             entry2);
2225                                         zap_old_map->size -= entry_size;
2226                                         vm_map_entry_link(map, entry1, entry2);
2227                                         map->size += entry_size;
2228                                         entry1 = entry2;
2229                                 }
2230                                 if (map->wiring_required) {
2231                                         /*
2232                                          * XXX TODO: we should rewire the
2233                                          * old pages here...
2234                                          */
2235                                 }
2236                                 vm_map_enter_restore_successes++;
2237                         }
2238                 }
2239         }
2240
2241         if (map_locked) {
2242                 vm_map_unlock(map);
2243         }
2244
2245         /*
2246          * Get rid of the "zap_maps" and all the map entries that
2247          * they may still contain.
2248          */
2249         if (zap_old_map != VM_MAP_NULL) {
2250                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2251                 zap_old_map = VM_MAP_NULL;
2252         }
2253         if (zap_new_map != VM_MAP_NULL) {
2254                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2255                 zap_new_map = VM_MAP_NULL;
2256         }
2257
2258         return result;
2259
2260 #undef  RETURN
2261 }
2262
2263 kern_return_t
2264 vm_map_enter_mem_object(
2265         vm_map_t                target_map,
2266         vm_map_offset_t         *address,
2267         vm_map_size_t           initial_size,
2268         vm_map_offset_t         mask,
2269         int                     flags,
2270         ipc_port_t              port,
2271         vm_object_offset_t      offset,
2272         boolean_t               copy,
2273         vm_prot_t               cur_protection,
2274         vm_prot_t               max_protection,
2275         vm_inherit_t            inheritance)
2276 {
2277         vm_map_address_t        map_addr;
2278         vm_map_size_t           map_size;
2279         vm_object_t             object;
2280         vm_object_size_t        size;
2281         kern_return_t           result;
2282
2283         /*
2284          * Check arguments for validity
2285          */
2286         if ((target_map == VM_MAP_NULL) ||
2287             (cur_protection & ~VM_PROT_ALL) ||
2288             (max_protection & ~VM_PROT_ALL) ||
2289             (inheritance > VM_INHERIT_LAST_VALID) ||
2290             initial_size == 0)
2291                 return KERN_INVALID_ARGUMENT;
2292
2293         map_addr = vm_map_trunc_page(*address);
2294         map_size = vm_map_round_page(initial_size);
2295         size = vm_object_round_page(initial_size);
2296
2297         /*
2298          * Find the vm object (if any) corresponding to this port.
2299          */
2300         if (!IP_VALID(port)) {
2301                 object = VM_OBJECT_NULL;
2302                 offset = 0;
2303                 copy = FALSE;
2304         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2305                 vm_named_entry_t        named_entry;
2306
2307                 named_entry = (vm_named_entry_t) port->ip_kobject;
2308                 /* a few checks to make sure user is obeying rules */
2309                 if (size == 0) {
2310                         if (offset >= named_entry->size)
2311                                 return KERN_INVALID_RIGHT;
2312                         size = named_entry->size - offset;
2313                 }
2314                 if ((named_entry->protection & max_protection) !=
2315                     max_protection)
2316                         return KERN_INVALID_RIGHT;
2317                 if ((named_entry->protection & cur_protection) !=
2318                     cur_protection)
2319                         return KERN_INVALID_RIGHT;
2320                 if (named_entry->size < (offset + size))
2321                         return KERN_INVALID_ARGUMENT;
2322
2323                 /* the callers parameter offset is defined to be the */
2324                 /* offset from beginning of named entry offset in object */
2325                 offset = offset + named_entry->offset;
2326
2327                 named_entry_lock(named_entry);
2328                 if (named_entry->is_sub_map) {
2329                         vm_map_t                submap;
2330
2331                         submap = named_entry->backing.map;
2332                         vm_map_lock(submap);
2333                         vm_map_reference(submap);
2334                         vm_map_unlock(submap);
2335                         named_entry_unlock(named_entry);
2336
2337                         result = vm_map_enter(target_map,
2338                                               &map_addr,
2339                                               map_size,
2340                                               mask,
2341                                               flags | VM_FLAGS_SUBMAP,
2342                                               (vm_object_t) submap,
2343                                               offset,
2344                                               copy,
2345                                               cur_protection,
2346                                               max_protection,
2347                                               inheritance);
2348                         if (result != KERN_SUCCESS) {
2349                                 vm_map_deallocate(submap);
2350                         } else {
2351                                 /*
2352                                  * No need to lock "submap" just to check its
2353                                  * "mapped" flag: that flag is never reset
2354                                  * once it's been set and if we race, we'll
2355                                  * just end up setting it twice, which is OK.
2356                                  */
2357                                 if (submap->mapped == FALSE) {
2358                                         /*
2359                                          * This submap has never been mapped.
2360                                          * Set its "mapped" flag now that it
2361                                          * has been mapped.
2362                                          * This happens only for the first ever
2363                                          * mapping of a "submap".
2364                                          */
2365                                         vm_map_lock(submap);
2366                                         submap->mapped = TRUE;
2367                                         vm_map_unlock(submap);
2368                                 }
2369                                 *address = map_addr;
2370                         }
2371                         return result;
2372
2373                 } else if (named_entry->is_pager) {
2374                         unsigned int    access;
2375                         vm_prot_t       protections;
2376                         unsigned int    wimg_mode;
2377                         boolean_t       cache_attr;
2378
2379                         protections = named_entry->protection & VM_PROT_ALL;
2380                         access = GET_MAP_MEM(named_entry->protection);
2381
2382                         object = vm_object_enter(named_entry->backing.pager,
2383                                                  named_entry->size,
2384                                                  named_entry->internal,
2385                                                  FALSE,
2386                                                  FALSE);
2387                         if (object == VM_OBJECT_NULL) {
2388                                 named_entry_unlock(named_entry);
2389                                 return KERN_INVALID_OBJECT;
2390                         }
2391
2392                         /* JMM - drop reference on pager here */
2393
2394                         /* create an extra ref for the named entry */
2395                         vm_object_lock(object);
2396                         vm_object_reference_locked(object);
2397                         named_entry->backing.object = object;
2398                         named_entry->is_pager = FALSE;
2399                         named_entry_unlock(named_entry);
2400
2401                         wimg_mode = object->wimg_bits;
2402                         if (access == MAP_MEM_IO) {
2403                                 wimg_mode = VM_WIMG_IO;
2404                         } else if (access == MAP_MEM_COPYBACK) {
2405                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2406                         } else if (access == MAP_MEM_WTHRU) {
2407                                 wimg_mode = VM_WIMG_WTHRU;
2408                         } else if (access == MAP_MEM_WCOMB) {
2409                                 wimg_mode = VM_WIMG_WCOMB;
2410                         }
2411                         if (wimg_mode == VM_WIMG_IO ||
2412                             wimg_mode == VM_WIMG_WCOMB)
2413                                 cache_attr = TRUE;
2414                         else
2415                                 cache_attr = FALSE;
2416
2417                         /* wait for object (if any) to be ready */
2418                         if (!named_entry->internal) {
2419                                 while (!object->pager_ready) {
2420                                         vm_object_wait(
2421                                                 object,
2422                                                 VM_OBJECT_EVENT_PAGER_READY,
2423                                                 THREAD_UNINT);
2424                                         vm_object_lock(object);
2425                                 }
2426                         }
2427
2428                         if (object->wimg_bits != wimg_mode) {
2429                                 vm_page_t p;
2430
2431                                 vm_object_paging_wait(object, THREAD_UNINT);
2432
2433                                 object->wimg_bits = wimg_mode;
2434                                 queue_iterate(&object->memq, p, vm_page_t, listq) {
2435                                         if (!p->fictitious) {
2436                                                 if (p->pmapped)
2437                                                         pmap_disconnect(p->phys_page);
2438                                                 if (cache_attr)
2439                                                         pmap_sync_page_attributes_phys(p->phys_page);
2440                                         }
2441                                 }
2442                         }
2443                         object->true_share = TRUE;
2444                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2445                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2446                         vm_object_unlock(object);
2447                 } else {
2448                         /* This is the case where we are going to map */
2449                         /* an already mapped object.  If the object is */
2450                         /* not ready it is internal.  An external     */
2451                         /* object cannot be mapped until it is ready  */
2452                         /* we can therefore avoid the ready check     */
2453                         /* in this case.  */
2454                         object = named_entry->backing.object;
2455                         assert(object != VM_OBJECT_NULL);
2456                         named_entry_unlock(named_entry);
2457                         vm_object_reference(object);
2458                 }
2459         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2460                 /*
2461                  * JMM - This is temporary until we unify named entries
2462                  * and raw memory objects.
2463                  *
2464                  * Detected fake ip_kotype for a memory object.  In
2465                  * this case, the port isn't really a port at all, but
2466                  * instead is just a raw memory object.
2467                  */
2468
2469                 object = vm_object_enter((memory_object_t)port,
2470                                          size, FALSE, FALSE, FALSE);
2471                 if (object == VM_OBJECT_NULL)
2472                         return KERN_INVALID_OBJECT;
2473
2474                 /* wait for object (if any) to be ready */
2475                 if (object != VM_OBJECT_NULL) {
2476                         if (object == kernel_object) {
2477                                 printf("Warning: Attempt to map kernel object"
2478                                         " by a non-private kernel entity\n");
2479                                 return KERN_INVALID_OBJECT;
2480                         }
2481                         if (!object->pager_ready) {
2482                                 vm_object_lock(object);
2483
2484                                 while (!object->pager_ready) {
2485                                         vm_object_wait(object,
2486                                                        VM_OBJECT_EVENT_PAGER_READY,
2487                                                        THREAD_UNINT);
2488                                         vm_object_lock(object);
2489                                 }
2490                                 vm_object_unlock(object);
2491                         }
2492                 }
2493         } else {
2494                 return KERN_INVALID_OBJECT;
2495         }
2496
2497         if (object != VM_OBJECT_NULL &&
2498             object->named &&
2499             object->pager != MEMORY_OBJECT_NULL &&
2500             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2501                 memory_object_t pager;
2502                 vm_prot_t       pager_prot;
2503                 kern_return_t   kr;
2504
2505                 /*
2506                  * For "named" VM objects, let the pager know that the
2507                  * memory object is being mapped.  Some pagers need to keep
2508                  * track of this, to know when they can reclaim the memory
2509                  * object, for example.
2510                  * VM calls memory_object_map() for each mapping (specifying
2511                  * the protection of each mapping) and calls
2512                  * memory_object_last_unmap() when all the mappings are gone.
2513                  */
2514                 pager_prot = max_protection;
2515                 if (copy) {
2516                         /*
2517                          * Copy-On-Write mapping: won't modify the
2518                          * memory object.
2519                          */
2520                         pager_prot &= ~VM_PROT_WRITE;
2521                 }
2522                 vm_object_lock(object);
2523                 pager = object->pager;
2524                 if (object->named &&
2525                     pager != MEMORY_OBJECT_NULL &&
2526                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2527                         assert(object->pager_ready);
2528                         vm_object_mapping_wait(object, THREAD_UNINT);
2529                         vm_object_mapping_begin(object);
2530                         vm_object_unlock(object);
2531
2532                         kr = memory_object_map(pager, pager_prot);
2533                         assert(kr == KERN_SUCCESS);
2534
2535                         vm_object_lock(object);
2536                         vm_object_mapping_end(object);
2537                 }
2538                 vm_object_unlock(object);
2539         }
2540
2541         /*
2542          *      Perform the copy if requested
2543          */
2544
2545         if (copy) {
2546                 vm_object_t             new_object;
2547                 vm_object_offset_t      new_offset;
2548
2549                 result = vm_object_copy_strategically(object, offset, size,
2550                                                       &new_object, &new_offset,
2551                                                       &copy);
2552
2553
2554                 if (result == KERN_MEMORY_RESTART_COPY) {
2555                         boolean_t success;
2556                         boolean_t src_needs_copy;
2557
2558                         /*
2559                          * XXX
2560                          * We currently ignore src_needs_copy.
2561                          * This really is the issue of how to make
2562                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2563                          * non-kernel users to use. Solution forthcoming.
2564                          * In the meantime, since we don't allow non-kernel
2565                          * memory managers to specify symmetric copy,
2566                          * we won't run into problems here.
2567                          */
2568                         new_object = object;
2569                         new_offset = offset;
2570                         success = vm_object_copy_quickly(&new_object,
2571                                                          new_offset, size,
2572                                                          &src_needs_copy,
2573                                                          &copy);
2574                         assert(success);
2575                         result = KERN_SUCCESS;
2576                 }
2577                 /*
2578                  *      Throw away the reference to the
2579                  *      original object, as it won't be mapped.
2580                  */
2581
2582                 vm_object_deallocate(object);
2583
2584                 if (result != KERN_SUCCESS)
2585                         return result;
2586
2587                 object = new_object;
2588                 offset = new_offset;
2589         }
2590
2591         result = vm_map_enter(target_map,
2592                               &map_addr, map_size,
2593                               (vm_map_offset_t)mask,
2594                               flags,
2595                               object, offset,
2596                               copy,
2597                               cur_protection, max_protection, inheritance);
2598         if (result != KERN_SUCCESS)
2599                 vm_object_deallocate(object);
2600         *address = map_addr;
2601         return result;
2602 }
2603
2604
2605
2606
2607 kern_return_t
2608 vm_map_enter_mem_object_control(
2609         vm_map_t                target_map,
2610         vm_map_offset_t         *address,
2611         vm_map_size_t           initial_size,
2612         vm_map_offset_t         mask,
2613         int                     flags,
2614         memory_object_control_t control,
2615         vm_object_offset_t      offset,
2616         boolean_t               copy,
2617         vm_prot_t               cur_protection,
2618         vm_prot_t               max_protection,
2619         vm_inherit_t            inheritance)
2620 {
2621         vm_map_address_t        map_addr;
2622         vm_map_size_t           map_size;
2623         vm_object_t             object;
2624         vm_object_size_t        size;
2625         kern_return_t           result;
2626         memory_object_t         pager;
2627         vm_prot_t               pager_prot;
2628         kern_return_t           kr;
2629
2630         /*
2631          * Check arguments for validity
2632          */
2633         if ((target_map == VM_MAP_NULL) ||
2634             (cur_protection & ~VM_PROT_ALL) ||
2635             (max_protection & ~VM_PROT_ALL) ||
2636             (inheritance > VM_INHERIT_LAST_VALID) ||
2637             initial_size == 0)
2638                 return KERN_INVALID_ARGUMENT;
2639
2640         map_addr = vm_map_trunc_page(*address);
2641         map_size = vm_map_round_page(initial_size);
2642         size = vm_object_round_page(initial_size);
2643
2644         object = memory_object_control_to_vm_object(control);
2645
2646         if (object == VM_OBJECT_NULL)
2647                 return KERN_INVALID_OBJECT;
2648
2649         if (object == kernel_object) {
2650                 printf("Warning: Attempt to map kernel object"
2651                        " by a non-private kernel entity\n");
2652                 return KERN_INVALID_OBJECT;
2653         }
2654
2655         vm_object_lock(object);
2656         object->ref_count++;
2657         vm_object_res_reference(object);
2658
2659         /*
2660          * For "named" VM objects, let the pager know that the
2661          * memory object is being mapped.  Some pagers need to keep
2662          * track of this, to know when they can reclaim the memory
2663          * object, for example.
2664          * VM calls memory_object_map() for each mapping (specifying
2665          * the protection of each mapping) and calls
2666          * memory_object_last_unmap() when all the mappings are gone.
2667          */
2668         pager_prot = max_protection;
2669         if (copy) {
2670                 pager_prot &= ~VM_PROT_WRITE;
2671         }
2672         pager = object->pager;
2673         if (object->named &&
2674             pager != MEMORY_OBJECT_NULL &&
2675             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2676                 assert(object->pager_ready);
2677                 vm_object_mapping_wait(object, THREAD_UNINT);
2678                 vm_object_mapping_begin(object);
2679                 vm_object_unlock(object);
2680
2681                 kr = memory_object_map(pager, pager_prot);
2682                 assert(kr == KERN_SUCCESS);
2683
2684                 vm_object_lock(object);
2685                 vm_object_mapping_end(object);
2686         }
2687         vm_object_unlock(object);
2688
2689         /*
2690          *      Perform the copy if requested
2691          */
2692
2693         if (copy) {
2694                 vm_object_t             new_object;
2695                 vm_object_offset_t      new_offset;
2696
2697                 result = vm_object_copy_strategically(object, offset, size,
2698                                                       &new_object, &new_offset,
2699                                                       &copy);
2700
2701
2702                 if (result == KERN_MEMORY_RESTART_COPY) {
2703                         boolean_t success;
2704                         boolean_t src_needs_copy;
2705
2706                         /*
2707                          * XXX
2708                          * We currently ignore src_needs_copy.
2709                          * This really is the issue of how to make
2710                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2711                          * non-kernel users to use. Solution forthcoming.
2712                          * In the meantime, since we don't allow non-kernel
2713                          * memory managers to specify symmetric copy,
2714                          * we won't run into problems here.
2715                          */
2716                         new_object = object;
2717                         new_offset = offset;
2718                         success = vm_object_copy_quickly(&new_object,
2719                                                          new_offset, size,
2720                                                          &src_needs_copy,
2721                                                          &copy);
2722                         assert(success);
2723                         result = KERN_SUCCESS;
2724                 }
2725                 /*
2726                  *      Throw away the reference to the
2727                  *      original object, as it won't be mapped.
2728                  */
2729
2730                 vm_object_deallocate(object);
2731
2732                 if (result != KERN_SUCCESS)
2733                         return result;
2734
2735                 object = new_object;
2736                 offset = new_offset;
2737         }
2738
2739         result = vm_map_enter(target_map,
2740                               &map_addr, map_size,
2741                               (vm_map_offset_t)mask,
2742                               flags,
2743                               object, offset,
2744                               copy,
2745                               cur_protection, max_protection, inheritance);
2746         if (result != KERN_SUCCESS)
2747                 vm_object_deallocate(object);
2748         *address = map_addr;
2749
2750         return result;
2751 }
2752
2753
2754 #if     VM_CPM
2755
2756 #ifdef MACH_ASSERT
2757 extern pmap_paddr_t     avail_start, avail_end;
2758 #endif
2759
2760 /*
2761  *      Allocate memory in the specified map, with the caveat that
2762  *      the memory is physically contiguous.  This call may fail
2763  *      if the system can't find sufficient contiguous memory.
2764  *      This call may cause or lead to heart-stopping amounts of
2765  *      paging activity.
2766  *
2767  *      Memory obtained from this call should be freed in the
2768  *      normal way, viz., via vm_deallocate.
2769  */
2770 kern_return_t
2771 vm_map_enter_cpm(
2772         vm_map_t                map,
2773         vm_map_offset_t *addr,
2774         vm_map_size_t           size,
2775         int                     flags)
2776 {
2777         vm_object_t             cpm_obj;
2778         pmap_t                  pmap;
2779         vm_page_t               m, pages;
2780         kern_return_t           kr;
2781         vm_map_offset_t         va, start, end, offset;
2782 #if     MACH_ASSERT
2783         vm_map_offset_t         prev_addr;
2784 #endif  /* MACH_ASSERT */
2785
2786         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2787
2788         if (!vm_allocate_cpm_enabled)
2789                 return KERN_FAILURE;
2790
2791         if (size == 0) {
2792                 *addr = 0;
2793                 return KERN_SUCCESS;
2794         }
2795         if (anywhere)
2796                 *addr = vm_map_min(map);
2797         else
2798                 *addr = vm_map_trunc_page(*addr);
2799         size = vm_map_round_page(size);
2800
2801         /*
2802          * LP64todo - cpm_allocate should probably allow
2803          * allocations of >4GB, but not with the current
2804          * algorithm, so just cast down the size for now.
2805          */
2806         if (size > VM_MAX_ADDRESS)
2807                 return KERN_RESOURCE_SHORTAGE;
2808         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2809                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2810                 return kr;
2811
2812         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2813         assert(cpm_obj != VM_OBJECT_NULL);
2814         assert(cpm_obj->internal);
2815         assert(cpm_obj->size == (vm_object_size_t)size);
2816         assert(cpm_obj->can_persist == FALSE);
2817         assert(cpm_obj->pager_created == FALSE);
2818         assert(cpm_obj->pageout == FALSE);
2819         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2820
2821         /*
2822          *      Insert pages into object.
2823          */
2824
2825         vm_object_lock(cpm_obj);
2826         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2827                 m = pages;
2828                 pages = NEXT_PAGE(m);
2829                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2830
2831                 assert(!m->gobbled);
2832                 assert(!m->wanted);
2833                 assert(!m->pageout);
2834                 assert(!m->tabled);
2835                 assert(VM_PAGE_WIRED(m));
2836                 /*
2837                  * ENCRYPTED SWAP:
2838                  * "m" is not supposed to be pageable, so it
2839                  * should not be encrypted.  It wouldn't be safe
2840                  * to enter it in a new VM object while encrypted.
2841                  */
2842                 ASSERT_PAGE_DECRYPTED(m);
2843                 assert(m->busy);
2844                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2845
2846                 m->busy = FALSE;
2847                 vm_page_insert(m, cpm_obj, offset);
2848         }
2849         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2850         vm_object_unlock(cpm_obj);
2851
2852         /*
2853          *      Hang onto a reference on the object in case a
2854          *      multi-threaded application for some reason decides
2855          *      to deallocate the portion of the address space into
2856          *      which we will insert this object.
2857          *
2858          *      Unfortunately, we must insert the object now before
2859          *      we can talk to the pmap module about which addresses
2860          *      must be wired down.  Hence, the race with a multi-
2861          *      threaded app.
2862          */
2863         vm_object_reference(cpm_obj);
2864
2865         /*
2866          *      Insert object into map.
2867          */
2868
2869         kr = vm_map_enter(
2870                 map,
2871                 addr,
2872                 size,
2873                 (vm_map_offset_t)0,
2874                 flags,
2875                 cpm_obj,
2876                 (vm_object_offset_t)0,
2877                 FALSE,
2878                 VM_PROT_ALL,
2879                 VM_PROT_ALL,
2880                 VM_INHERIT_DEFAULT);
2881
2882         if (kr != KERN_SUCCESS) {
2883                 /*
2884                  *      A CPM object doesn't have can_persist set,
2885                  *      so all we have to do is deallocate it to
2886                  *      free up these pages.
2887                  */
2888                 assert(cpm_obj->pager_created == FALSE);
2889                 assert(cpm_obj->can_persist == FALSE);
2890                 assert(cpm_obj->pageout == FALSE);
2891                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2892                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2893                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2894         }
2895
2896         /*
2897          *      Inform the physical mapping system that the
2898          *      range of addresses may not fault, so that
2899          *      page tables and such can be locked down as well.
2900          */
2901         start = *addr;
2902         end = start + size;
2903         pmap = vm_map_pmap(map);
2904         pmap_pageable(pmap, start, end, FALSE);
2905
2906         /*
2907          *      Enter each page into the pmap, to avoid faults.
2908          *      Note that this loop could be coded more efficiently,
2909          *      if the need arose, rather than looking up each page
2910          *      again.
2911          */
2912         for (offset = 0, va = start; offset < size;
2913              va += PAGE_SIZE, offset += PAGE_SIZE) {
2914                 int type_of_fault;
2915
2916                 vm_object_lock(cpm_obj);
2917                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2918                 assert(m != VM_PAGE_NULL);
2919
2920                 vm_page_zero_fill(m);
2921
2922                 type_of_fault = DBG_ZERO_FILL_FAULT;
2923
2924                 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2925                                VM_PAGE_WIRED(m), FALSE, FALSE,
2926                                &type_of_fault);
2927
2928                 vm_object_unlock(cpm_obj);
2929         }
2930
2931 #if     MACH_ASSERT
2932         /*
2933          *      Verify ordering in address space.
2934          */
2935         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2936                 vm_object_lock(cpm_obj);
2937                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2938                 vm_object_unlock(cpm_obj);
2939                 if (m == VM_PAGE_NULL)
2940                         panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2941                               cpm_obj, offset);
2942                 assert(m->tabled);
2943                 assert(!m->busy);
2944                 assert(!m->wanted);
2945                 assert(!m->fictitious);
2946                 assert(!m->private);
2947                 assert(!m->absent);
2948                 assert(!m->error);
2949                 assert(!m->cleaning);
2950                 assert(!m->precious);
2951                 assert(!m->clustered);
2952                 if (offset != 0) {
2953                         if (m->phys_page != prev_addr + 1) {
2954                                 printf("start 0x%x end 0x%x va 0x%x\n",
2955                                        start, end, va);
2956                                 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2957                                 printf("m 0x%x prev_address 0x%x\n", m,
2958                                        prev_addr);
2959                                 panic("vm_allocate_cpm:  pages not contig!");
2960                         }
2961                 }
2962                 prev_addr = m->phys_page;
2963         }
2964 #endif  /* MACH_ASSERT */
2965
2966         vm_object_deallocate(cpm_obj); /* kill extra ref */
2967
2968         return kr;
2969 }
2970
2971
2972 #else   /* VM_CPM */
2973
2974 /*
2975  *      Interface is defined in all cases, but unless the kernel
2976  *      is built explicitly for this option, the interface does
2977  *      nothing.
2978  */
2979
2980 kern_return_t
2981 vm_map_enter_cpm(
2982         __unused vm_map_t       map,
2983         __unused vm_map_offset_t        *addr,
2984         __unused vm_map_size_t  size,
2985         __unused int            flags)
2986 {
2987         return KERN_FAILURE;
2988 }
2989 #endif /* VM_CPM */
2990
2991 /* Not used without nested pmaps */
2992 #ifndef NO_NESTED_PMAP
2993 /*
2994  * Clip and unnest a portion of a nested submap mapping.
2995  */
2996
2997
2998 static void
2999 vm_map_clip_unnest(
3000         vm_map_t        map,
3001         vm_map_entry_t  entry,
3002         vm_map_offset_t start_unnest,
3003         vm_map_offset_t end_unnest)
3004 {
3005         vm_map_offset_t old_start_unnest = start_unnest;
3006         vm_map_offset_t old_end_unnest = end_unnest;
3007
3008         assert(entry->is_sub_map);
3009         assert(entry->object.sub_map != NULL);
3010
3011         /*
3012          * Query the platform for the optimal unnest range.
3013          * DRK: There's some duplication of effort here, since
3014          * callers may have adjusted the range to some extent. This
3015          * routine was introduced to support 1GiB subtree nesting
3016          * for x86 platforms, which can also nest on 2MiB boundaries
3017          * depending on size/alignment.
3018          */
3019         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3020                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3021         }
3022
3023         if (entry->vme_start > start_unnest ||
3024             entry->vme_end < end_unnest) {
3025                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3026                       "bad nested entry: start=0x%llx end=0x%llx\n",
3027                       (long long)start_unnest, (long long)end_unnest,
3028                       (long long)entry->vme_start, (long long)entry->vme_end);
3029         }
3030
3031         if (start_unnest > entry->vme_start) {
3032                 _vm_map_clip_start(&map->hdr,
3033                                    entry,
3034                                    start_unnest);
3035                 UPDATE_FIRST_FREE(map, map->first_free);
3036         }
3037         if (entry->vme_end > end_unnest) {
3038                 _vm_map_clip_end(&map->hdr,
3039                                  entry,
3040                                  end_unnest);
3041                 UPDATE_FIRST_FREE(map, map->first_free);
3042         }
3043
3044         pmap_unnest(map->pmap,
3045                     entry->vme_start,
3046                     entry->vme_end - entry->vme_start);
3047         if ((map->mapped) && (map->ref_count)) {
3048                 /* clean up parent map/maps */
3049                 vm_map_submap_pmap_clean(
3050                         map, entry->vme_start,
3051                         entry->vme_end,
3052                         entry->object.sub_map,
3053                         entry->offset);
3054         }
3055         entry->use_pmap = FALSE;
3056 }
3057 #endif  /* NO_NESTED_PMAP */
3058
3059 /*
3060  *      vm_map_clip_start:      [ internal use only ]
3061  *
3062  *      Asserts that the given entry begins at or after
3063  *      the specified address; if necessary,
3064  *      it splits the entry into two.
3065  */
3066 static void
3067 vm_map_clip_start(
3068         vm_map_t        map,
3069         vm_map_entry_t  entry,
3070         vm_map_offset_t startaddr)
3071 {
3072 #ifndef NO_NESTED_PMAP
3073         if (entry->use_pmap &&
3074             startaddr >= entry->vme_start) {
3075                 vm_map_offset_t start_unnest, end_unnest;
3076
3077                 /*
3078                  * Make sure "startaddr" is no longer in a nested range
3079                  * before we clip.  Unnest only the minimum range the platform
3080                  * can handle.
3081                  * vm_map_clip_unnest may perform additional adjustments to
3082                  * the unnest range.
3083                  */
3084                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3085                 end_unnest = start_unnest + pmap_nesting_size_min;
3086                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3087         }
3088 #endif /* NO_NESTED_PMAP */
3089         if (startaddr > entry->vme_start) {
3090                 if (entry->object.vm_object &&
3091                     !entry->is_sub_map &&
3092                     entry->object.vm_object->phys_contiguous) {
3093                         pmap_remove(map->pmap,
3094                                     (addr64_t)(entry->vme_start),
3095                                     (addr64_t)(entry->vme_end));
3096                 }
3097                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3098                 UPDATE_FIRST_FREE(map, map->first_free);
3099         }
3100 }
3101
3102
3103 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3104         MACRO_BEGIN \
3105         if ((startaddr) > (entry)->vme_start) \
3106                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3107         MACRO_END
3108
3109 /*
3110  *      This routine is called only when it is known that
3111  *      the entry must be split.
3112  */
3113 static void
3114 _vm_map_clip_start(
3115         register struct vm_map_header   *map_header,
3116         register vm_map_entry_t         entry,
3117         register vm_map_offset_t                start)
3118 {
3119         register vm_map_entry_t new_entry;
3120
3121         /*
3122          *      Split off the front portion --
3123          *      note that we must insert the new
3124          *      entry BEFORE this one, so that
3125          *      this entry has the specified starting
3126          *      address.
3127          */
3128
3129         new_entry = _vm_map_entry_create(map_header);
3130         vm_map_entry_copy_full(new_entry, entry);
3131
3132         new_entry->vme_end = start;
3133         entry->offset += (start - entry->vme_start);
3134         entry->vme_start = start;
3135
3136         _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
3137
3138         if (entry->is_sub_map)
3139                 vm_map_reference(new_entry->object.sub_map);
3140         else
3141                 vm_object_reference(new_entry->object.vm_object);
3142 }
3143
3144
3145 /*
3146  *      vm_map_clip_end:        [ internal use only ]
3147  *
3148  *      Asserts that the given entry ends at or before
3149  *      the specified address; if necessary,
3150  *      it splits the entry into two.
3151  */
3152 static void
3153 vm_map_clip_end(
3154         vm_map_t        map,
3155         vm_map_entry_t  entry,
3156         vm_map_offset_t endaddr)
3157 {
3158         if (endaddr > entry->vme_end) {
3159                 /*
3160                  * Within the scope of this clipping, limit "endaddr" to
3161                  * the end of this map entry...
3162                  */
3163                 endaddr = entry->vme_end;
3164         }
3165 #ifndef NO_NESTED_PMAP
3166         if (entry->use_pmap) {
3167                 vm_map_offset_t start_unnest, end_unnest;
3168
3169                 /*
3170                  * Make sure the range between the start of this entry and
3171                  * the new "endaddr" is no longer nested before we clip.
3172                  * Unnest only the minimum range the platform can handle.
3173                  * vm_map_clip_unnest may perform additional adjustments to
3174                  * the unnest range.
3175                  */
3176                 start_unnest = entry->vme_start;
3177                 end_unnest =
3178                         (endaddr + pmap_nesting_size_min - 1) &
3179                         ~(pmap_nesting_size_min - 1);
3180                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3181         }
3182 #endif /* NO_NESTED_PMAP */
3183         if (endaddr < entry->vme_end) {
3184                 if (entry->object.vm_object &&
3185                     !entry->is_sub_map &&
3186                     entry->object.vm_object->phys_contiguous) {
3187                         pmap_remove(map->pmap,
3188                                     (addr64_t)(entry->vme_start),
3189                                     (addr64_t)(entry->vme_end));
3190                 }
3191                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3192                 UPDATE_FIRST_FREE(map, map->first_free);
3193         }
3194 }
3195
3196
3197 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3198         MACRO_BEGIN \
3199         if ((endaddr) < (entry)->vme_end) \
3200                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3201         MACRO_END
3202
3203 /*
3204  *      This routine is called only when it is known that
3205  *      the entry must be split.
3206  */
3207 static void
3208 _vm_map_clip_end(
3209         register struct vm_map_header   *map_header,
3210         register vm_map_entry_t         entry,
3211         register vm_map_offset_t        end)
3212 {
3213         register vm_map_entry_t new_entry;
3214
3215         /*
3216          *      Create a new entry and insert it
3217          *      AFTER the specified entry
3218          */
3219
3220         new_entry = _vm_map_entry_create(map_header);
3221         vm_map_entry_copy_full(new_entry, entry);
3222
3223         new_entry->vme_start = entry->vme_end = end;
3224         new_entry->offset += (end - entry->vme_start);
3225
3226         _vm_map_entry_link(map_header, entry, new_entry);
3227
3228         if (entry->is_sub_map)
3229                 vm_map_reference(new_entry->object.sub_map);
3230         else
3231                 vm_object_reference(new_entry->object.vm_object);
3232 }
3233
3234
3235 /*
3236  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3237  *
3238  *      Asserts that the starting and ending region
3239  *      addresses fall within the valid range of the map.
3240  */
3241 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3242         MACRO_BEGIN                             \
3243         if (start < vm_map_min(map))            \
3244                 start = vm_map_min(map);        \
3245         if (end > vm_map_max(map))              \
3246                 end = vm_map_max(map);          \
3247         if (start > end)                        \
3248                 start = end;                    \
3249         MACRO_END
3250
3251 /*
3252  *      vm_map_range_check:     [ internal use only ]
3253  *
3254  *      Check that the region defined by the specified start and
3255  *      end addresses are wholly contained within a single map
3256  *      entry or set of adjacent map entries of the spacified map,
3257  *      i.e. the specified region contains no unmapped space.
3258  *      If any or all of the region is unmapped, FALSE is returned.
3259  *      Otherwise, TRUE is returned and if the output argument 'entry'
3260  *      is not NULL it points to the map entry containing the start
3261  *      of the region.
3262  *
3263  *      The map is locked for reading on entry and is left locked.
3264  */
3265 static boolean_t
3266 vm_map_range_check(
3267         register vm_map_t       map,
3268         register vm_map_offset_t        start,
3269         register vm_map_offset_t        end,
3270         vm_map_entry_t          *entry)
3271 {
3272         vm_map_entry_t          cur;
3273         register vm_map_offset_t        prev;
3274
3275         /*
3276          *      Basic sanity checks first
3277          */
3278         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3279                 return (FALSE);
3280
3281         /*
3282          *      Check first if the region starts within a valid
3283          *      mapping for the map.
3284          */
3285         if (!vm_map_lookup_entry(map, start, &cur))
3286                 return (FALSE);
3287
3288         /*
3289          *      Optimize for the case that the region is contained
3290          *      in a single map entry.
3291          */
3292         if (entry != (vm_map_entry_t *) NULL)
3293                 *entry = cur;
3294         if (end <= cur->vme_end)
3295                 return (TRUE);
3296
3297         /*
3298          *      If the region is not wholly contained within a
3299          *      single entry, walk the entries looking for holes.
3300          */
3301         prev = cur->vme_end;
3302         cur = cur->vme_next;
3303         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3304                 if (end <= cur->vme_end)
3305                         return (TRUE);
3306                 prev = cur->vme_end;
3307                 cur = cur->vme_next;
3308         }
3309         return (FALSE);
3310 }
3311
3312 /*
3313  *      vm_map_submap:          [ kernel use only ]
3314  *
3315  *      Mark the given range as handled by a subordinate map.
3316  *
3317  *      This range must have been created with vm_map_find using
3318  *      the vm_submap_object, and no other operations may have been
3319  *      performed on this range prior to calling vm_map_submap.
3320  *
3321  *      Only a limited number of operations can be performed
3322  *      within this rage after calling vm_map_submap:
3323  *              vm_fault
3324  *      [Don't try vm_map_copyin!]
3325  *
3326  *      To remove a submapping, one must first remove the
3327  *      range from the superior map, and then destroy the
3328  *      submap (if desired).  [Better yet, don't try it.]
3329  */
3330 kern_return_t
3331 vm_map_submap(
3332         vm_map_t                map,
3333         vm_map_offset_t start,
3334         vm_map_offset_t end,
3335         vm_map_t                submap,
3336         vm_map_offset_t offset,
3337 #ifdef NO_NESTED_PMAP
3338         __unused
3339 #endif  /* NO_NESTED_PMAP */
3340         boolean_t               use_pmap)
3341 {
3342         vm_map_entry_t          entry;
3343         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3344         register vm_object_t    object;
3345
3346         vm_map_lock(map);
3347
3348         if (! vm_map_lookup_entry(map, start, &entry)) {
3349                 entry = entry->vme_next;
3350         }
3351
3352         if (entry == vm_map_to_entry(map) ||
3353             entry->is_sub_map) {
3354                 vm_map_unlock(map);
3355                 return KERN_INVALID_ARGUMENT;
3356         }
3357
3358         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3359         vm_map_clip_start(map, entry, start);
3360         vm_map_clip_end(map, entry, end);
3361
3362         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3363             (!entry->is_sub_map) &&
3364             ((object = entry->object.vm_object) == vm_submap_object) &&
3365             (object->resident_page_count == 0) &&
3366             (object->copy == VM_OBJECT_NULL) &&
3367             (object->shadow == VM_OBJECT_NULL) &&
3368             (!object->pager_created)) {
3369                 entry->offset = (vm_object_offset_t)offset;
3370                 entry->object.vm_object = VM_OBJECT_NULL;
3371                 vm_object_deallocate(object);
3372                 entry->is_sub_map = TRUE;
3373                 entry->object.sub_map = submap;
3374                 vm_map_reference(submap);
3375                 submap->mapped = TRUE;
3376
3377 #ifndef NO_NESTED_PMAP
3378                 if (use_pmap) {
3379                         /* nest if platform code will allow */
3380                         if(submap->pmap == NULL) {
3381                                 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3382                                 if(submap->pmap == PMAP_NULL) {
3383                                         vm_map_unlock(map);
3384                                         return(KERN_NO_SPACE);
3385                                 }
3386                         }
3387                         result = pmap_nest(map->pmap,
3388                                            (entry->object.sub_map)->pmap,
3389                                            (addr64_t)start,
3390                                            (addr64_t)start,
3391                                            (uint64_t)(end - start));
3392                         if(result)
3393                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3394                         entry->use_pmap = TRUE;
3395                 }
3396 #else   /* NO_NESTED_PMAP */
3397                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3398 #endif  /* NO_NESTED_PMAP */
3399                 result = KERN_SUCCESS;
3400         }
3401         vm_map_unlock(map);
3402
3403         return(result);
3404 }
3405
3406 /*
3407  *      vm_map_protect:
3408  *
3409  *      Sets the protection of the specified address
3410  *      region in the target map.  If "set_max" is
3411  *      specified, the maximum protection is to be set;
3412  *      otherwise, only the current protection is affected.
3413  */
3414 kern_return_t
3415 vm_map_protect(
3416         register vm_map_t       map,
3417         register vm_map_offset_t        start,
3418         register vm_map_offset_t        end,
3419         register vm_prot_t      new_prot,
3420         register boolean_t      set_max)
3421 {
3422         register vm_map_entry_t         current;
3423         register vm_map_offset_t        prev;
3424         vm_map_entry_t                  entry;
3425         vm_prot_t                       new_max;
3426
3427         XPR(XPR_VM_MAP,
3428             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3429             map, start, end, new_prot, set_max);
3430
3431         vm_map_lock(map);
3432
3433         /* LP64todo - remove this check when vm_map_commpage64()
3434          * no longer has to stuff in a map_entry for the commpage
3435          * above the map's max_offset.
3436          */
3437         if (start >= map->max_offset) {
3438                 vm_map_unlock(map);
3439                 return(KERN_INVALID_ADDRESS);
3440         }
3441
3442         while(1) {
3443                 /*
3444                  *      Lookup the entry.  If it doesn't start in a valid
3445                  *      entry, return an error.
3446                  */
3447                 if (! vm_map_lookup_entry(map, start, &entry)) {
3448                         vm_map_unlock(map);
3449                         return(KERN_INVALID_ADDRESS);
3450                 }
3451
3452                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3453                         start = SUPERPAGE_ROUND_DOWN(start);
3454                         continue;
3455                 }
3456                 break;
3457         }
3458         if (entry->superpage_size)
3459                 end = SUPERPAGE_ROUND_UP(end);
3460
3461         /*
3462          *      Make a first pass to check for protection and address
3463          *      violations.
3464          */
3465
3466         current = entry;
3467         prev = current->vme_start;
3468         while ((current != vm_map_to_entry(map)) &&
3469                (current->vme_start < end)) {
3470
3471                 /*
3472                  * If there is a hole, return an error.
3473                  */
3474                 if (current->vme_start != prev) {
3475                         vm_map_unlock(map);
3476                         return(KERN_INVALID_ADDRESS);
3477                 }
3478
3479                 new_max = current->max_protection;
3480                 if(new_prot & VM_PROT_COPY) {
3481                         new_max |= VM_PROT_WRITE;
3482                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3483                                 vm_map_unlock(map);
3484                                 return(KERN_PROTECTION_FAILURE);
3485                         }
3486                 } else {
3487                         if ((new_prot & new_max) != new_prot) {
3488                                 vm_map_unlock(map);
3489                                 return(KERN_PROTECTION_FAILURE);
3490                         }
3491                 }
3492
3493 #if CONFIG_EMBEDDED
3494                 if (new_prot & VM_PROT_WRITE) {
3495                         if (new_prot & VM_PROT_EXECUTE) {
3496                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3497                                 new_prot &= ~VM_PROT_EXECUTE;
3498                         }
3499                 }
3500 #endif
3501
3502                 prev = current->vme_end;
3503                 current = current->vme_next;
3504         }
3505         if (end > prev) {
3506                 vm_map_unlock(map);
3507                 return(KERN_INVALID_ADDRESS);
3508         }
3509
3510         /*
3511          *      Go back and fix up protections.
3512          *      Clip to start here if the range starts within
3513          *      the entry.
3514          */
3515
3516         current = entry;
3517         if (current != vm_map_to_entry(map)) {
3518                 /* clip and unnest if necessary */
3519                 vm_map_clip_start(map, current, start);
3520         }
3521
3522         while ((current != vm_map_to_entry(map)) &&
3523                (current->vme_start < end)) {
3524
3525                 vm_prot_t       old_prot;
3526
3527                 vm_map_clip_end(map, current, end);
3528
3529                 assert(!current->use_pmap); /* clipping did unnest if needed */
3530
3531                 old_prot = current->protection;
3532
3533                 if(new_prot & VM_PROT_COPY) {
3534                         /* caller is asking specifically to copy the      */
3535                         /* mapped data, this implies that max protection  */
3536                         /* will include write.  Caller must be prepared   */
3537                         /* for loss of shared memory communication in the */
3538                         /* target area after taking this step */
3539                         current->needs_copy = TRUE;
3540                         current->max_protection |= VM_PROT_WRITE;
3541                 }
3542
3543                 if (set_max)
3544                         current->protection =
3545                                 (current->max_protection =
3546                                  new_prot & ~VM_PROT_COPY) &
3547                                 old_prot;
3548                 else
3549                         current->protection = new_prot & ~VM_PROT_COPY;
3550
3551                 /*
3552                  *      Update physical map if necessary.
3553                  *      If the request is to turn off write protection,
3554                  *      we won't do it for real (in pmap). This is because
3555                  *      it would cause copy-on-write to fail.  We've already
3556                  *      set, the new protection in the map, so if a
3557                  *      write-protect fault occurred, it will be fixed up
3558                  *      properly, COW or not.
3559                  */
3560                 if (current->protection != old_prot) {
3561                         /* Look one level in we support nested pmaps */
3562                         /* from mapped submaps which are direct entries */
3563                         /* in our map */
3564
3565                         vm_prot_t prot;
3566
3567                         prot = current->protection & ~VM_PROT_WRITE;
3568
3569                         if (override_nx(map, current->alias) && prot)
3570                                 prot |= VM_PROT_EXECUTE;
3571
3572                         if (current->is_sub_map && current->use_pmap) {
3573                                 pmap_protect(current->object.sub_map->pmap,
3574                                              current->vme_start,
3575                                              current->vme_end,
3576                                              prot);
3577                         } else {
3578                                 pmap_protect(map->pmap,
3579                                              current->vme_start,
3580                                              current->vme_end,
3581                                              prot);
3582                         }
3583                 }
3584                 current = current->vme_next;
3585         }
3586
3587         current = entry;
3588         while ((current != vm_map_to_entry(map)) &&
3589                (current->vme_start <= end)) {
3590                 vm_map_simplify_entry(map, current);
3591                 current = current->vme_next;
3592         }
3593
3594         vm_map_unlock(map);
3595         return(KERN_SUCCESS);
3596 }
3597
3598 /*
3599  *      vm_map_inherit:
3600  *
3601  *      Sets the inheritance of the specified address
3602  *      range in the target map.  Inheritance
3603  *      affects how the map will be shared with
3604  *      child maps at the time of vm_map_fork.
3605  */
3606 kern_return_t
3607 vm_map_inherit(
3608         register vm_map_t       map,
3609         register vm_map_offset_t        start,
3610         register vm_map_offset_t        end,
3611         register vm_inherit_t   new_inheritance)
3612 {
3613         register vm_map_entry_t entry;
3614         vm_map_entry_t  temp_entry;
3615
3616         vm_map_lock(map);
3617
3618         VM_MAP_RANGE_CHECK(map, start, end);
3619
3620         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3621                 entry = temp_entry;
3622         }
3623         else {
3624                 temp_entry = temp_entry->vme_next;
3625                 entry = temp_entry;
3626         }
3627
3628         /* first check entire range for submaps which can't support the */
3629         /* given inheritance. */
3630         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3631                 if(entry->is_sub_map) {
3632                         if(new_inheritance == VM_INHERIT_COPY) {
3633                                 vm_map_unlock(map);
3634                                 return(KERN_INVALID_ARGUMENT);
3635                         }
3636                 }
3637
3638                 entry = entry->vme_next;
3639         }
3640
3641         entry = temp_entry;
3642         if (entry != vm_map_to_entry(map)) {
3643                 /* clip and unnest if necessary */
3644                 vm_map_clip_start(map, entry, start);
3645         }
3646
3647         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3648                 vm_map_clip_end(map, entry, end);
3649                 assert(!entry->use_pmap); /* clip did unnest if needed */
3650
3651                 entry->inheritance = new_inheritance;
3652
3653                 entry = entry->vme_next;
3654         }
3655
3656         vm_map_unlock(map);
3657         return(KERN_SUCCESS);
3658 }
3659
3660 /*
3661  * Update the accounting for the amount of wired memory in this map.  If the user has
3662  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3663  */
3664
3665 static kern_return_t
3666 add_wire_counts(
3667         vm_map_t        map,
3668         vm_map_entry_t  entry,
3669         boolean_t       user_wire)
3670 {
3671         vm_map_size_t   size;
3672
3673         if (user_wire) {
3674
3675                 /*
3676                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3677                  * this map entry.
3678                  */
3679
3680                 if (entry->user_wired_count == 0) {
3681                         size = entry->vme_end - entry->vme_start;
3682
3683                         /*
3684                          * Since this is the first time the user is wiring this map entry, check to see if we're
3685                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3686                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3687                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3688                          * limit, then we fail.
3689                          */
3690
3691                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3692                            size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit ||
3693                            size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount)
3694                                 return KERN_RESOURCE_SHORTAGE;
3695
3696                         /*
3697                          * The first time the user wires an entry, we also increment the wired_count and add this to
3698                          * the total that has been wired in the map.
3699                          */
3700
3701                         if (entry->wired_count >= MAX_WIRE_COUNT)
3702                                 return KERN_FAILURE;
3703
3704                         entry->wired_count++;
3705                         map->user_wire_size += size;
3706                 }
3707
3708                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3709                         return KERN_FAILURE;
3710
3711                 entry->user_wired_count++;
3712
3713         } else {
3714
3715                 /*
3716                  * The kernel's wiring the memory.  Just bump the count and continue.
3717                  */
3718
3719                 if (entry->wired_count >= MAX_WIRE_COUNT)
3720                         panic("vm_map_wire: too many wirings");
3721
3722                 entry->wired_count++;
3723         }
3724
3725         return KERN_SUCCESS;
3726 }
3727
3728 /*
3729  * Update the memory wiring accounting now that the given map entry is being unwired.
3730  */
3731
3732 static void
3733 subtract_wire_counts(
3734         vm_map_t        map,
3735         vm_map_entry_t  entry,
3736         boolean_t       user_wire)
3737 {
3738
3739         if (user_wire) {
3740
3741                 /*
3742                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3743                  */
3744
3745                 if (entry->user_wired_count == 1) {
3746
3747                         /*
3748                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3749                          * user wired memory for this map.
3750                          */
3751
3752                         assert(entry->wired_count >= 1);
3753                         entry->wired_count--;
3754                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3755                 }
3756
3757                 assert(entry->user_wired_count >= 1);
3758                 entry->user_wired_count--;
3759
3760         } else {
3761
3762                 /*
3763                  * The kernel is unwiring the memory.   Just update the count.
3764                  */
3765
3766                 assert(entry->wired_count >= 1);
3767                 entry->wired_count--;
3768         }
3769 }
3770
3771 /*
3772  *      vm_map_wire:
3773  *
3774  *      Sets the pageability of the specified address range in the
3775  *      target map as wired.  Regions specified as not pageable require
3776  *      locked-down physical memory and physical page maps.  The
3777  *      access_type variable indicates types of accesses that must not
3778  *      generate page faults.  This is checked against protection of
3779  *      memory being locked-down.
3780  *
3781  *      The map must not be locked, but a reference must remain to the
3782  *      map throughout the call.
3783  */
3784 static kern_return_t
3785 vm_map_wire_nested(
3786         register vm_map_t       map,
3787         register vm_map_offset_t        start,
3788         register vm_map_offset_t        end,
3789         register vm_prot_t      access_type,
3790         boolean_t               user_wire,
3791         pmap_t                  map_pmap,
3792         vm_map_offset_t         pmap_addr)
3793 {
3794         register vm_map_entry_t entry;
3795         struct vm_map_entry     *first_entry, tmp_entry;
3796         vm_map_t                real_map;
3797         register vm_map_offset_t        s,e;
3798         kern_return_t           rc;
3799         boolean_t               need_wakeup;
3800         boolean_t               main_map = FALSE;
3801         wait_interrupt_t        interruptible_state;
3802         thread_t                cur_thread;
3803         unsigned int            last_timestamp;
3804         vm_map_size_t           size;
3805
3806         vm_map_lock(map);
3807         if(map_pmap == NULL)
3808                 main_map = TRUE;
3809         last_timestamp = map->timestamp;
3810
3811         VM_MAP_RANGE_CHECK(map, start, end);
3812         assert(page_aligned(start));
3813         assert(page_aligned(end));
3814         if (start == end) {
3815                 /* We wired what the caller asked for, zero pages */
3816                 vm_map_unlock(map);
3817                 return KERN_SUCCESS;
3818         }
3819
3820         need_wakeup = FALSE;
3821         cur_thread = current_thread();
3822
3823         s = start;
3824         rc = KERN_SUCCESS;
3825
3826         if (vm_map_lookup_entry(map, s, &first_entry)) {
3827                 entry = first_entry;
3828                 /*
3829                  * vm_map_clip_start will be done later.
3830                  * We don't want to unnest any nested submaps here !
3831                  */
3832         } else {
3833                 /* Start address is not in map */
3834                 rc = KERN_INVALID_ADDRESS;
3835                 goto done;
3836         }
3837
3838         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3839                 /*
3840                  * At this point, we have wired from "start" to "s".
3841                  * We still need to wire from "s" to "end".
3842                  *
3843                  * "entry" hasn't been clipped, so it could start before "s"
3844                  * and/or end after "end".
3845                  */
3846
3847                 /* "e" is how far we want to wire in this entry */
3848                 e = entry->vme_end;
3849                 if (e > end)
3850                         e = end;
3851
3852                 /*
3853                  * If another thread is wiring/unwiring this entry then
3854                  * block after informing other thread to wake us up.
3855                  */
3856                 if (entry->in_transition) {
3857                         wait_result_t wait_result;
3858
3859                         /*
3860                          * We have not clipped the entry.  Make sure that
3861                          * the start address is in range so that the lookup
3862                          * below will succeed.
3863                          * "s" is the current starting point: we've already
3864                          * wired from "start" to "s" and we still have
3865                          * to wire from "s" to "end".
3866                          */
3867
3868                         entry->needs_wakeup = TRUE;
3869
3870                         /*
3871                          * wake up anybody waiting on entries that we have
3872                          * already wired.
3873                          */
3874                         if (need_wakeup) {
3875                                 vm_map_entry_wakeup(map);
3876                                 need_wakeup = FALSE;
3877                         }
3878                         /*
3879                          * User wiring is interruptible
3880                          */
3881                         wait_result = vm_map_entry_wait(map,
3882                                                         (user_wire) ? THREAD_ABORTSAFE :
3883                                                         THREAD_UNINT);
3884                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3885                                 /*
3886                                  * undo the wirings we have done so far
3887                                  * We do not clear the needs_wakeup flag,
3888                                  * because we cannot tell if we were the
3889                                  * only one waiting.
3890                                  */
3891                                 rc = KERN_FAILURE;
3892                                 goto done;
3893                         }
3894
3895                         /*
3896                          * Cannot avoid a lookup here. reset timestamp.
3897                          */
3898                         last_timestamp = map->timestamp;
3899
3900                         /*
3901                          * The entry could have been clipped, look it up again.
3902                          * Worse that can happen is, it may not exist anymore.
3903                          */
3904                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3905                                 if (!user_wire)
3906                                         panic("vm_map_wire: re-lookup failed");
3907
3908                                 /*
3909                                  * User: undo everything upto the previous
3910                                  * entry.  let vm_map_unwire worry about
3911                                  * checking the validity of the range.
3912                                  */
3913                                 rc = KERN_FAILURE;
3914                                 goto done;
3915                         }
3916                         entry = first_entry;
3917                         continue;
3918                 }
3919
3920                 if (entry->is_sub_map) {
3921                         vm_map_offset_t sub_start;
3922                         vm_map_offset_t sub_end;
3923                         vm_map_offset_t local_start;
3924                         vm_map_offset_t local_end;
3925                         pmap_t          pmap;
3926
3927                         vm_map_clip_start(map, entry, s);
3928                         vm_map_clip_end(map, entry, end);
3929
3930                         sub_start = entry->offset;
3931                         sub_end = entry->vme_end;
3932                         sub_end += entry->offset - entry->vme_start;
3933
3934                         local_end = entry->vme_end;
3935                         if(map_pmap == NULL) {
3936                                 vm_object_t             object;
3937                                 vm_object_offset_t      offset;
3938                                 vm_prot_t               prot;
3939                                 boolean_t               wired;
3940                                 vm_map_entry_t          local_entry;
3941                                 vm_map_version_t         version;
3942                                 vm_map_t                lookup_map;
3943
3944                                 if(entry->use_pmap) {
3945                                         pmap = entry->object.sub_map->pmap;
3946                                         /* ppc implementation requires that */
3947                                         /* submaps pmap address ranges line */
3948                                         /* up with parent map */
3949 #ifdef notdef
3950                                         pmap_addr = sub_start;
3951 #endif
3952                                         pmap_addr = s;
3953                                 } else {
3954                                         pmap = map->pmap;
3955                                         pmap_addr = s;
3956                                 }
3957
3958                                 if (entry->wired_count) {
3959                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3960                                                 goto done;
3961
3962                                         /*
3963                                          * The map was not unlocked:
3964                                          * no need to goto re-lookup.
3965                                          * Just go directly to next entry.
3966                                          */
3967                                         entry = entry->vme_next;
3968                                         s = entry->vme_start;
3969                                         continue;
3970
3971                                 }
3972
3973                                 /* call vm_map_lookup_locked to */
3974                                 /* cause any needs copy to be   */
3975                                 /* evaluated */
3976                                 local_start = entry->vme_start;
3977                                 lookup_map = map;
3978                                 vm_map_lock_write_to_read(map);
3979                                 if(vm_map_lookup_locked(
3980                                            &lookup_map, local_start,
3981                                            access_type,
3982                                            OBJECT_LOCK_EXCLUSIVE,
3983                                            &version, &object,
3984                                            &offset, &prot, &wired,
3985                                            NULL,
3986                                            &real_map)) {
3987
3988                                         vm_map_unlock_read(lookup_map);
3989                                         vm_map_unwire(map, start,
3990                                                       s, user_wire);
3991                                         return(KERN_FAILURE);
3992                                 }
3993                                 if(real_map != lookup_map)
3994                                         vm_map_unlock(real_map);
3995                                 vm_map_unlock_read(lookup_map);
3996                                 vm_map_lock(map);
3997                                 vm_object_unlock(object);
3998
3999                                 /* we unlocked, so must re-lookup */
4000                                 if (!vm_map_lookup_entry(map,
4001                                                          local_start,
4002                                                          &local_entry)) {
4003                                         rc = KERN_FAILURE;
4004                                         goto done;
4005                                 }
4006
4007                                 /*
4008                                  * entry could have been "simplified",
4009                                  * so re-clip
4010                                  */
4011                                 entry = local_entry;
4012                                 assert(s == local_start);
4013                                 vm_map_clip_start(map, entry, s);
4014                                 vm_map_clip_end(map, entry, end);
4015                                 /* re-compute "e" */
4016                                 e = entry->vme_end;
4017                                 if (e > end)
4018                                         e = end;
4019
4020                                 /* did we have a change of type? */
4021                                 if (!entry->is_sub_map) {
4022                                         last_timestamp = map->timestamp;
4023                                         continue;
4024                                 }
4025                         } else {
4026                                 local_start = entry->vme_start;
4027                                 pmap = map_pmap;
4028                         }
4029
4030                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4031                                 goto done;
4032
4033                         entry->in_transition = TRUE;
4034
4035                         vm_map_unlock(map);
4036                         rc = vm_map_wire_nested(entry->object.sub_map,
4037                                                 sub_start, sub_end,
4038                                                 access_type,
4039                                                 user_wire, pmap, pmap_addr);
4040                         vm_map_lock(map);
4041
4042                         /*
4043                          * Find the entry again.  It could have been clipped
4044                          * after we unlocked the map.
4045                          */
4046                         if (!vm_map_lookup_entry(map, local_start,
4047                                                  &first_entry))
4048                                 panic("vm_map_wire: re-lookup failed");
4049                         entry = first_entry;
4050
4051                         assert(local_start == s);
4052                         /* re-compute "e" */
4053                         e = entry->vme_end;
4054                         if (e > end)
4055                                 e = end;
4056
4057                         last_timestamp = map->timestamp;
4058                         while ((entry != vm_map_to_entry(map)) &&
4059                                (entry->vme_start < e)) {
4060                                 assert(entry->in_transition);
4061                                 entry->in_transition = FALSE;
4062                                 if (entry->needs_wakeup) {
4063                                         entry->needs_wakeup = FALSE;
4064                                         need_wakeup = TRUE;
4065                                 }
4066                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4067                                         subtract_wire_counts(map, entry, user_wire);
4068                                 }
4069                                 entry = entry->vme_next;
4070                         }
4071                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4072                                 goto done;
4073                         }
4074
4075                         /* no need to relookup again */
4076                         s = entry->vme_start;
4077                         continue;
4078                 }
4079
4080                 /*
4081                  * If this entry is already wired then increment
4082                  * the appropriate wire reference count.
4083                  */
4084                 if (entry->wired_count) {
4085                         /*
4086                          * entry is already wired down, get our reference
4087                          * after clipping to our range.
4088                          */
4089                         vm_map_clip_start(map, entry, s);
4090                         vm_map_clip_end(map, entry, end);
4091
4092                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4093                                 goto done;
4094
4095                         /* map was not unlocked: no need to relookup */
4096                         entry = entry->vme_next;
4097                         s = entry->vme_start;
4098                         continue;
4099                 }
4100
4101                 /*
4102                  * Unwired entry or wire request transmitted via submap
4103                  */
4104
4105
4106                 /*
4107                  * Perform actions of vm_map_lookup that need the write
4108                  * lock on the map: create a shadow object for a
4109                  * copy-on-write region, or an object for a zero-fill
4110                  * region.
4111                  */
4112                 size = entry->vme_end - entry->vme_start;
4113                 /*
4114                  * If wiring a copy-on-write page, we need to copy it now
4115                  * even if we're only (currently) requesting read access.
4116                  * This is aggressive, but once it's wired we can't move it.
4117                  */
4118                 if (entry->needs_copy) {
4119                         vm_object_shadow(&entry->object.vm_object,
4120                                          &entry->offset, size);
4121                         entry->needs_copy = FALSE;
4122                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4123                         entry->object.vm_object = vm_object_allocate(size);
4124                         entry->offset = (vm_object_offset_t)0;
4125                 }
4126
4127                 vm_map_clip_start(map, entry, s);
4128                 vm_map_clip_end(map, entry, end);
4129
4130                 /* re-compute "e" */
4131                 e = entry->vme_end;
4132                 if (e > end)
4133                         e = end;
4134
4135                 /*
4136                  * Check for holes and protection mismatch.
4137                  * Holes: Next entry should be contiguous unless this
4138                  *        is the end of the region.
4139                  * Protection: Access requested must be allowed, unless
4140                  *      wiring is by protection class
4141                  */
4142                 if ((entry->vme_end < end) &&
4143                     ((entry->vme_next == vm_map_to_entry(map)) ||
4144                      (entry->vme_next->vme_start > entry->vme_end))) {
4145                         /* found a hole */
4146                         rc = KERN_INVALID_ADDRESS;
4147                         goto done;
4148                 }
4149                 if ((entry->protection & access_type) != access_type) {
4150                         /* found a protection problem */
4151                         rc = KERN_PROTECTION_FAILURE;
4152                         goto done;
4153                 }
4154
4155                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4156
4157                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4158                         goto done;
4159
4160                 entry->in_transition = TRUE;
4161
4162                 /*
4163                  * This entry might get split once we unlock the map.
4164                  * In vm_fault_wire(), we need the current range as
4165                  * defined by this entry.  In order for this to work
4166                  * along with a simultaneous clip operation, we make a
4167                  * temporary copy of this entry and use that for the
4168                  * wiring.  Note that the underlying objects do not
4169                  * change during a clip.
4170                  */
4171                 tmp_entry = *entry;
4172
4173                 /*
4174                  * The in_transition state guarentees that the entry
4175                  * (or entries for this range, if split occured) will be
4176                  * there when the map lock is acquired for the second time.
4177                  */
4178                 vm_map_unlock(map);
4179
4180                 if (!user_wire && cur_thread != THREAD_NULL)
4181                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4182                 else
4183                         interruptible_state = THREAD_UNINT;
4184
4185                 if(map_pmap)
4186                         rc = vm_fault_wire(map,
4187                                            &tmp_entry, map_pmap, pmap_addr);
4188                 else
4189                         rc = vm_fault_wire(map,
4190                                            &tmp_entry, map->pmap,
4191                                            tmp_entry.vme_start);
4192
4193                 if (!user_wire && cur_thread != THREAD_NULL)
4194                         thread_interrupt_level(interruptible_state);
4195
4196                 vm_map_lock(map);
4197
4198                 if (last_timestamp+1 != map->timestamp) {
4199                         /*
4200                          * Find the entry again.  It could have been clipped
4201                          * after we unlocked the map.
4202                          */
4203                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4204                                                  &first_entry))
4205                                 panic("vm_map_wire: re-lookup failed");
4206
4207                         entry = first_entry;
4208                 }
4209
4210                 last_timestamp = map->timestamp;
4211
4212                 while ((entry != vm_map_to_entry(map)) &&
4213                        (entry->vme_start < tmp_entry.vme_end)) {
4214                         assert(entry->in_transition);
4215                         entry->in_transition = FALSE;
4216                         if (entry->needs_wakeup) {
4217                                 entry->needs_wakeup = FALSE;
4218                                 need_wakeup = TRUE;
4219                         }
4220                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4221                                 subtract_wire_counts(map, entry, user_wire);
4222                         }
4223                         entry = entry->vme_next;
4224                 }
4225
4226                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4227                         goto done;
4228                 }
4229
4230                 s = entry->vme_start;
4231         } /* end while loop through map entries */
4232
4233 done:
4234         if (rc == KERN_SUCCESS) {
4235                 /* repair any damage we may have made to the VM map */
4236                 vm_map_simplify_range(map, start, end);
4237         }
4238
4239         vm_map_unlock(map);
4240
4241         /*
4242          * wake up anybody waiting on entries we wired.
4243          */
4244         if (need_wakeup)
4245                 vm_map_entry_wakeup(map);
4246
4247         if (rc != KERN_SUCCESS) {
4248                 /* undo what has been wired so far */
4249                 vm_map_unwire(map, start, s, user_wire);
4250         }
4251
4252         return rc;
4253
4254 }
4255
4256 kern_return_t
4257 vm_map_wire(
4258         register vm_map_t       map,
4259         register vm_map_offset_t        start,
4260         register vm_map_offset_t        end,
4261         register vm_prot_t      access_type,
4262         boolean_t               user_wire)
4263 {
4264
4265         kern_return_t   kret;
4266
4267 #ifdef ppc
4268         /*
4269          * the calls to mapping_prealloc and mapping_relpre
4270          * (along with the VM_MAP_RANGE_CHECK to insure a
4271          * resonable range was passed in) are
4272          * currently necessary because
4273          * we haven't enabled kernel pre-emption
4274          * and/or the pmap_enter cannot purge and re-use
4275          * existing mappings
4276          */
4277         VM_MAP_RANGE_CHECK(map, start, end);
4278         assert((unsigned int) (end - start) == (end - start));
4279         mapping_prealloc((unsigned int) (end - start));
4280 #endif
4281         kret = vm_map_wire_nested(map, start, end, access_type,
4282                                   user_wire, (pmap_t)NULL, 0);
4283 #ifdef ppc
4284         mapping_relpre();
4285 #endif
4286         return kret;
4287 }
4288
4289 /*
4290  *      vm_map_unwire:
4291  *
4292  *      Sets the pageability of the specified address range in the target
4293  *      as pageable.  Regions specified must have been wired previously.
4294  *
4295  *      The map must not be locked, but a reference must remain to the map
4296  *      throughout the call.
4297  *
4298  *      Kernel will panic on failures.  User unwire ignores holes and
4299  *      unwired and intransition entries to avoid losing memory by leaving
4300  *      it unwired.
4301  */
4302 static kern_return_t
4303 vm_map_unwire_nested(
4304         register vm_map_t       map,
4305         register vm_map_offset_t        start,
4306         register vm_map_offset_t        end,
4307         boolean_t               user_wire,
4308         pmap_t                  map_pmap,
4309         vm_map_offset_t         pmap_addr)
4310 {
4311         register vm_map_entry_t entry;
4312         struct vm_map_entry     *first_entry, tmp_entry;
4313         boolean_t               need_wakeup;
4314         boolean_t               main_map = FALSE;
4315         unsigned int            last_timestamp;
4316
4317         vm_map_lock(map);
4318         if(map_pmap == NULL)
4319                 main_map = TRUE;
4320         last_timestamp = map->timestamp;
4321
4322         VM_MAP_RANGE_CHECK(map, start, end);
4323         assert(page_aligned(start));
4324         assert(page_aligned(end));
4325
4326         if (start == end) {
4327                 /* We unwired what the caller asked for: zero pages */
4328                 vm_map_unlock(map);
4329                 return KERN_SUCCESS;
4330         }
4331
4332         if (vm_map_lookup_entry(map, start, &first_entry)) {
4333                 entry = first_entry;
4334                 /*
4335                  * vm_map_clip_start will be done later.
4336                  * We don't want to unnest any nested sub maps here !
4337                  */
4338         }
4339         else {
4340                 if (!user_wire) {
4341                         panic("vm_map_unwire: start not found");
4342                 }
4343                 /*      Start address is not in map. */
4344                 vm_map_unlock(map);
4345                 return(KERN_INVALID_ADDRESS);
4346         }
4347
4348         if (entry->superpage_size) {
4349                 /* superpages are always wired */
4350                 vm_map_unlock(map);
4351                 return KERN_INVALID_ADDRESS;
4352         }
4353
4354         need_wakeup = FALSE;
4355         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4356                 if (entry->in_transition) {
4357                         /*
4358                          * 1)
4359                          * Another thread is wiring down this entry. Note
4360                          * that if it is not for the other thread we would
4361                          * be unwiring an unwired entry.  This is not
4362                          * permitted.  If we wait, we will be unwiring memory
4363                          * we did not wire.
4364                          *
4365                          * 2)
4366                          * Another thread is unwiring this entry.  We did not
4367                          * have a reference to it, because if we did, this
4368                          * entry will not be getting unwired now.
4369                          */
4370                         if (!user_wire) {
4371                                 /*
4372                                  * XXX FBDP
4373                                  * This could happen:  there could be some
4374                                  * overlapping vslock/vsunlock operations
4375                                  * going on.
4376                                  * We should probably just wait and retry,
4377                                  * but then we have to be careful that this
4378                                  * entry could get "simplified" after
4379                                  * "in_transition" gets unset and before
4380                                  * we re-lookup the entry, so we would
4381                                  * have to re-clip the entry to avoid
4382                                  * re-unwiring what we have already unwired...
4383                                  * See vm_map_wire_nested().
4384                                  *
4385                                  * Or we could just ignore "in_transition"
4386                                  * here and proceed to decement the wired
4387                                  * count(s) on this entry.  That should be fine
4388                                  * as long as "wired_count" doesn't drop all
4389                                  * the way to 0 (and we should panic if THAT
4390                                  * happens).
4391                                  */
4392                                 panic("vm_map_unwire: in_transition entry");
4393                         }
4394
4395                         entry = entry->vme_next;
4396                         continue;
4397                 }
4398
4399                 if (entry->is_sub_map) {
4400                         vm_map_offset_t sub_start;
4401                         vm_map_offset_t sub_end;
4402                         vm_map_offset_t local_end;
4403                         pmap_t          pmap;
4404
4405                         vm_map_clip_start(map, entry, start);
4406                         vm_map_clip_end(map, entry, end);
4407
4408                         sub_start = entry->offset;
4409                         sub_end = entry->vme_end - entry->vme_start;
4410                         sub_end += entry->offset;
4411                         local_end = entry->vme_end;
4412                         if(map_pmap == NULL) {
4413                                 if(entry->use_pmap) {
4414                                         pmap = entry->object.sub_map->pmap;
4415                                         pmap_addr = sub_start;
4416                                 } else {
4417                                         pmap = map->pmap;
4418                                         pmap_addr = start;
4419                                 }
4420                                 if (entry->wired_count == 0 ||
4421                                     (user_wire && entry->user_wired_count == 0)) {
4422                                         if (!user_wire)
4423                                                 panic("vm_map_unwire: entry is unwired");
4424                                         entry = entry->vme_next;
4425                                         continue;
4426                                 }
4427
4428                                 /*
4429                                  * Check for holes
4430                                  * Holes: Next entry should be contiguous unless
4431                                  * this is the end of the region.
4432                                  */
4433                                 if (((entry->vme_end < end) &&
4434                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4435                                       (entry->vme_next->vme_start
4436                                        > entry->vme_end)))) {
4437                                         if (!user_wire)
4438                                                 panic("vm_map_unwire: non-contiguous region");
4439 /*
4440                                         entry = entry->vme_next;
4441                                         continue;
4442 */
4443                                 }
4444
4445                                 subtract_wire_counts(map, entry, user_wire);
4446
4447                                 if (entry->wired_count != 0) {
4448                                         entry = entry->vme_next;
4449                                         continue;
4450                                 }
4451
4452                                 entry->in_transition = TRUE;
4453                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4454
4455                                 /*
4456                                  * We can unlock the map now. The in_transition state
4457                                  * guarantees existance of the entry.
4458                                  */
4459                                 vm_map_unlock(map);
4460                                 vm_map_unwire_nested(entry->object.sub_map,
4461                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4462                                 vm_map_lock(map);
4463
4464                                 if (last_timestamp+1 != map->timestamp) {
4465                                         /*
4466                                          * Find the entry again.  It could have been
4467                                          * clipped or deleted after we unlocked the map.
4468                                          */
4469                                         if (!vm_map_lookup_entry(map,
4470                                                                  tmp_entry.vme_start,
4471                                                                  &first_entry)) {
4472                                                 if (!user_wire)
4473                                                         panic("vm_map_unwire: re-lookup failed");
4474                                                 entry = first_entry->vme_next;
4475                                         } else
4476                                                 entry = first_entry;
4477                                 }
4478                                 last_timestamp = map->timestamp;
4479
4480                                 /*
4481                                  * clear transition bit for all constituent entries
4482                                  * that were in the original entry (saved in
4483                                  * tmp_entry).  Also check for waiters.
4484                                  */
4485                                 while ((entry != vm_map_to_entry(map)) &&
4486                                        (entry->vme_start < tmp_entry.vme_end)) {
4487                                         assert(entry->in_transition);
4488                                         entry->in_transition = FALSE;
4489                                         if (entry->needs_wakeup) {
4490                                                 entry->needs_wakeup = FALSE;
4491                                                 need_wakeup = TRUE;
4492                                         }
4493                                         entry = entry->vme_next;
4494                                 }
4495                                 continue;
4496                         } else {
4497                                 vm_map_unlock(map);
4498                                 vm_map_unwire_nested(entry->object.sub_map,
4499                                                      sub_start, sub_end, user_wire, map_pmap,
4500                                                      pmap_addr);
4501                                 vm_map_lock(map);
4502
4503                                 if (last_timestamp+1 != map->timestamp) {
4504                                         /*
4505                                          * Find the entry again.  It could have been
4506                                          * clipped or deleted after we unlocked the map.
4507                                          */
4508                                         if (!vm_map_lookup_entry(map,
4509                                                                  tmp_entry.vme_start,
4510                                                                  &first_entry)) {
4511                                                 if (!user_wire)
4512                                                         panic("vm_map_unwire: re-lookup failed");
4513                                                 entry = first_entry->vme_next;
4514                                         } else
4515                                                 entry = first_entry;
4516                                 }
4517                                 last_timestamp = map->timestamp;
4518                         }
4519                 }
4520
4521
4522                 if ((entry->wired_count == 0) ||
4523                     (user_wire && entry->user_wired_count == 0)) {
4524                         if (!user_wire)
4525                                 panic("vm_map_unwire: entry is unwired");
4526
4527                         entry = entry->vme_next;
4528                         continue;
4529                 }
4530
4531                 assert(entry->wired_count > 0 &&
4532                        (!user_wire || entry->user_wired_count > 0));
4533
4534                 vm_map_clip_start(map, entry, start);
4535                 vm_map_clip_end(map, entry, end);
4536
4537                 /*
4538                  * Check for holes
4539                  * Holes: Next entry should be contiguous unless
4540                  *        this is the end of the region.
4541                  */
4542                 if (((entry->vme_end < end) &&
4543                      ((entry->vme_next == vm_map_to_entry(map)) ||
4544                       (entry->vme_next->vme_start > entry->vme_end)))) {
4545
4546                         if (!user_wire)
4547                                 panic("vm_map_unwire: non-contiguous region");
4548                         entry = entry->vme_next;
4549                         continue;
4550                 }
4551
4552                 subtract_wire_counts(map, entry, user_wire);
4553
4554                 if (entry->wired_count != 0) {
4555                         entry = entry->vme_next;
4556                         continue;
4557                 }
4558
4559                 if(entry->zero_wired_pages) {
4560                         entry->zero_wired_pages = FALSE;
4561                 }
4562
4563                 entry->in_transition = TRUE;
4564                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4565
4566                 /*
4567                  * We can unlock the map now. The in_transition state
4568                  * guarantees existance of the entry.
4569                  */
4570                 vm_map_unlock(map);
4571                 if(map_pmap) {
4572                         vm_fault_unwire(map,
4573                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4574                 } else {
4575                         vm_fault_unwire(map,
4576                                         &tmp_entry, FALSE, map->pmap,
4577                                         tmp_entry.vme_start);
4578                 }
4579                 vm_map_lock(map);
4580
4581                 if (last_timestamp+1 != map->timestamp) {
4582                         /*
4583                          * Find the entry again.  It could have been clipped
4584                          * or deleted after we unlocked the map.
4585                          */
4586                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4587                                                  &first_entry)) {
4588                                 if (!user_wire)
4589                                         panic("vm_map_unwire: re-lookup failed");
4590                                 entry = first_entry->vme_next;
4591                         } else
4592                                 entry = first_entry;
4593                 }
4594                 last_timestamp = map->timestamp;
4595
4596                 /*
4597                  * clear transition bit for all constituent entries that
4598                  * were in the original entry (saved in tmp_entry).  Also
4599                  * check for waiters.
4600                  */
4601                 while ((entry != vm_map_to_entry(map)) &&
4602                        (entry->vme_start < tmp_entry.vme_end)) {
4603                         assert(entry->in_transition);
4604                         entry->in_transition = FALSE;
4605                         if (entry->needs_wakeup) {
4606                                 entry->needs_wakeup = FALSE;
4607                                 need_wakeup = TRUE;
4608                         }
4609                         entry = entry->vme_next;
4610                 }
4611         }
4612
4613         /*
4614          * We might have fragmented the address space when we wired this
4615          * range of addresses.  Attempt to re-coalesce these VM map entries
4616          * with their neighbors now that they're no longer wired.
4617          * Under some circumstances, address space fragmentation can
4618          * prevent VM object shadow chain collapsing, which can cause
4619          * swap space leaks.
4620          */
4621         vm_map_simplify_range(map, start, end);
4622
4623         vm_map_unlock(map);
4624         /*
4625          * wake up anybody waiting on entries that we have unwired.
4626          */
4627         if (need_wakeup)
4628                 vm_map_entry_wakeup(map);
4629         return(KERN_SUCCESS);
4630
4631 }
4632
4633 kern_return_t
4634 vm_map_unwire(
4635         register vm_map_t       map,
4636         register vm_map_offset_t        start,
4637         register vm_map_offset_t        end,
4638         boolean_t               user_wire)
4639 {
4640         return vm_map_unwire_nested(map, start, end,
4641                                     user_wire, (pmap_t)NULL, 0);
4642 }
4643
4644
4645 /*
4646  *      vm_map_entry_delete:    [ internal use only ]
4647  *
4648  *      Deallocate the given entry from the target map.
4649  */
4650 static void
4651 vm_map_entry_delete(
4652         register vm_map_t       map,
4653         register vm_map_entry_t entry)
4654 {
4655         register vm_map_offset_t        s, e;
4656         register vm_object_t    object;
4657         register vm_map_t       submap;
4658
4659         s = entry->vme_start;
4660         e = entry->vme_end;
4661         assert(page_aligned(s));
4662         assert(page_aligned(e));
4663         assert(entry->wired_count == 0);
4664         assert(entry->user_wired_count == 0);
4665         assert(!entry->permanent);
4666
4667         if (entry->is_sub_map) {
4668                 object = NULL;
4669                 submap = entry->object.sub_map;
4670         } else {
4671                 submap = NULL;
4672                 object = entry->object.vm_object;
4673         }
4674
4675         vm_map_entry_unlink(map, entry);
4676         map->size -= e - s;
4677
4678         vm_map_entry_dispose(map, entry);
4679
4680         vm_map_unlock(map);
4681         /*
4682          *      Deallocate the object only after removing all
4683          *      pmap entries pointing to its pages.
4684          */
4685         if (submap)
4686                 vm_map_deallocate(submap);
4687         else
4688                 vm_object_deallocate(object);
4689
4690 }
4691
4692 void
4693 vm_map_submap_pmap_clean(
4694         vm_map_t        map,
4695         vm_map_offset_t start,
4696         vm_map_offset_t end,
4697         vm_map_t        sub_map,
4698         vm_map_offset_t offset)
4699 {
4700         vm_map_offset_t submap_start;
4701         vm_map_offset_t submap_end;
4702         vm_map_size_t   remove_size;
4703         vm_map_entry_t  entry;
4704
4705         submap_end = offset + (end - start);
4706         submap_start = offset;
4707         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4708
4709                 remove_size = (entry->vme_end - entry->vme_start);
4710                 if(offset > entry->vme_start)
4711                         remove_size -= offset - entry->vme_start;
4712
4713
4714                 if(submap_end < entry->vme_end) {
4715                         remove_size -=
4716                                 entry->vme_end - submap_end;
4717                 }
4718                 if(entry->is_sub_map) {
4719                         vm_map_submap_pmap_clean(
4720                                 sub_map,
4721                                 start,
4722                                 start + remove_size,
4723                                 entry->object.sub_map,
4724                                 entry->offset);
4725                 } else {
4726
4727                         if((map->mapped) && (map->ref_count)
4728                            && (entry->object.vm_object != NULL)) {
4729                                 vm_object_pmap_protect(
4730                                         entry->object.vm_object,
4731                                         entry->offset,
4732                                         remove_size,
4733                                         PMAP_NULL,
4734                                         entry->vme_start,
4735                                         VM_PROT_NONE);
4736                         } else {
4737                                 pmap_remove(map->pmap,
4738                                             (addr64_t)start,
4739                                             (addr64_t)(start + remove_size));
4740                         }
4741                 }
4742         }
4743
4744         entry = entry->vme_next;
4745
4746         while((entry != vm_map_to_entry(sub_map))
4747               && (entry->vme_start < submap_end)) {
4748                 remove_size = (entry->vme_end - entry->vme_start);
4749                 if(submap_end < entry->vme_end) {
4750                         remove_size -= entry->vme_end - submap_end;
4751                 }
4752                 if(entry->is_sub_map) {
4753                         vm_map_submap_pmap_clean(
4754                                 sub_map,
4755                                 (start + entry->vme_start) - offset,
4756                                 ((start + entry->vme_start) - offset) + remove_size,
4757                                 entry->object.sub_map,
4758                                 entry->offset);
4759                 } else {
4760                         if((map->mapped) && (map->ref_count)
4761                            && (entry->object.vm_object != NULL)) {
4762                                 vm_object_pmap_protect(
4763                                         entry->object.vm_object,
4764                                         entry->offset,
4765                                         remove_size,
4766                                         PMAP_NULL,
4767                                         entry->vme_start,
4768                                         VM_PROT_NONE);
4769                         } else {
4770                                 pmap_remove(map->pmap,
4771                                             (addr64_t)((start + entry->vme_start)
4772                                                        - offset),
4773                                             (addr64_t)(((start + entry->vme_start)
4774                                                         - offset) + remove_size));
4775                         }
4776                 }
4777                 entry = entry->vme_next;
4778         }
4779         return;
4780 }
4781
4782 /*
4783  *      vm_map_delete:  [ internal use only ]
4784  *
4785  *      Deallocates the given address range from the target map.
4786  *      Removes all user wirings. Unwires one kernel wiring if
4787  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4788  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4789  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4790  *
4791  *      This routine is called with map locked and leaves map locked.
4792  */
4793 static kern_return_t
4794 vm_map_delete(
4795         vm_map_t                map,
4796         vm_map_offset_t         start,
4797         vm_map_offset_t         end,
4798         int                     flags,
4799         vm_map_t                zap_map)
4800 {
4801         vm_map_entry_t          entry, next;
4802         struct   vm_map_entry   *first_entry, tmp_entry;
4803         register vm_map_offset_t s;
4804         register vm_object_t    object;
4805         boolean_t               need_wakeup;
4806         unsigned int            last_timestamp = ~0; /* unlikely value */
4807         int                     interruptible;
4808
4809         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4810                 THREAD_ABORTSAFE : THREAD_UNINT;
4811
4812         /*
4813          * All our DMA I/O operations in IOKit are currently done by
4814          * wiring through the map entries of the task requesting the I/O.
4815          * Because of this, we must always wait for kernel wirings
4816          * to go away on the entries before deleting them.
4817          *
4818          * Any caller who wants to actually remove a kernel wiring
4819          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4820          * properly remove one wiring instead of blasting through
4821          * them all.
4822          */
4823         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4824
4825         while(1) {
4826                 /*
4827                  *      Find the start of the region, and clip it
4828                  */
4829                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4830                         entry = first_entry;
4831                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4832                                 start = SUPERPAGE_ROUND_DOWN(start);
4833                                 continue;
4834                         }
4835                         if (start == entry->vme_start) {
4836                                 /*
4837                                  * No need to clip.  We don't want to cause
4838                                  * any unnecessary unnesting in this case...
4839                                  */
4840                         } else {
4841                                 vm_map_clip_start(map, entry, start);
4842                         }
4843
4844                         /*
4845                          *      Fix the lookup hint now, rather than each
4846                          *      time through the loop.
4847                          */
4848                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4849                 } else {
4850                         entry = first_entry->vme_next;
4851                 }
4852                 break;
4853         }
4854         if (entry->superpage_size)
4855                 end = SUPERPAGE_ROUND_UP(end);
4856
4857         need_wakeup = FALSE;
4858         /*
4859          *      Step through all entries in this region
4860          */
4861         s = entry->vme_start;
4862         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4863                 /*
4864                  * At this point, we have deleted all the memory entries
4865                  * between "start" and "s".  We still need to delete
4866                  * all memory entries between "s" and "end".
4867                  * While we were blocked and the map was unlocked, some
4868                  * new memory entries could have been re-allocated between
4869                  * "start" and "s" and we don't want to mess with those.
4870                  * Some of those entries could even have been re-assembled
4871                  * with an entry after "s" (in vm_map_simplify_entry()), so
4872                  * we may have to vm_map_clip_start() again.
4873                  */
4874
4875                 if (entry->vme_start >= s) {
4876                         /*
4877                          * This entry starts on or after "s"
4878                          * so no need to clip its start.
4879                          */
4880                 } else {
4881                         /*
4882                          * This entry has been re-assembled by a
4883                          * vm_map_simplify_entry().  We need to
4884                          * re-clip its start.
4885                          */
4886                         vm_map_clip_start(map, entry, s);
4887                 }
4888                 if (entry->vme_end <= end) {
4889                         /*
4890                          * This entry is going away completely, so no need
4891                          * to clip and possibly cause an unnecessary unnesting.
4892                          */
4893                 } else {
4894                         vm_map_clip_end(map, entry, end);
4895                 }
4896
4897                 if (entry->permanent) {
4898                         panic("attempt to remove permanent VM map entry "
4899                               "%p [0x%llx:0x%llx]\n",
4900                               entry, (uint64_t) s, (uint64_t) end);
4901                 }
4902
4903
4904                 if (entry->in_transition) {
4905                         wait_result_t wait_result;
4906
4907                         /*
4908                          * Another thread is wiring/unwiring this entry.
4909                          * Let the other thread know we are waiting.
4910                          */
4911                         assert(s == entry->vme_start);
4912                         entry->needs_wakeup = TRUE;
4913
4914                         /*
4915                          * wake up anybody waiting on entries that we have
4916                          * already unwired/deleted.
4917                          */
4918                         if (need_wakeup) {
4919                                 vm_map_entry_wakeup(map);
4920                                 need_wakeup = FALSE;
4921                         }
4922
4923                         wait_result = vm_map_entry_wait(map, interruptible);
4924
4925                         if (interruptible &&
4926                             wait_result == THREAD_INTERRUPTED) {
4927                                 /*
4928                                  * We do not clear the needs_wakeup flag,
4929                                  * since we cannot tell if we were the only one.
4930                                  */
4931                                 vm_map_unlock(map);
4932                                 return KERN_ABORTED;
4933                         }
4934
4935                         /*
4936                          * The entry could have been clipped or it
4937                          * may not exist anymore.  Look it up again.
4938                          */
4939                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4940                                 assert((map != kernel_map) &&
4941                                        (!entry->is_sub_map));
4942                                 /*
4943                                  * User: use the next entry
4944                                  */
4945                                 entry = first_entry->vme_next;
4946                                 s = entry->vme_start;
4947                         } else {
4948                                 entry = first_entry;
4949                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4950                         }
4951                         last_timestamp = map->timestamp;
4952                         continue;
4953                 } /* end in_transition */
4954
4955                 if (entry->wired_count) {
4956                         boolean_t       user_wire;
4957
4958                         user_wire = entry->user_wired_count > 0;
4959
4960                         /*
4961                          *      Remove a kernel wiring if requested
4962                          */
4963                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
4964                                 entry->wired_count--;
4965                         }
4966
4967                         /*
4968                          *      Remove all user wirings for proper accounting
4969                          */
4970                         if (entry->user_wired_count > 0) {
4971                                 while (entry->user_wired_count)
4972                                         subtract_wire_counts(map, entry, user_wire);
4973                         }
4974
4975                         if (entry->wired_count != 0) {
4976                                 assert(map != kernel_map);
4977                                 /*
4978                                  * Cannot continue.  Typical case is when
4979                                  * a user thread has physical io pending on
4980                                  * on this page.  Either wait for the
4981                                  * kernel wiring to go away or return an
4982                                  * error.
4983                                  */
4984                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4985                                         wait_result_t wait_result;
4986
4987                                         assert(s == entry->vme_start);
4988                                         entry->needs_wakeup = TRUE;
4989                                         wait_result = vm_map_entry_wait(map,
4990                                                                         interruptible);
4991
4992                                         if (interruptible &&
4993                                             wait_result == THREAD_INTERRUPTED) {
4994                                                 /*
4995                                                  * We do not clear the
4996                                                  * needs_wakeup flag, since we
4997                                                  * cannot tell if we were the
4998                                                  * only one.
4999                                                  */
5000                                                 vm_map_unlock(map);
5001                                                 return KERN_ABORTED;
5002                                         }
5003
5004                                         /*
5005                                          * The entry could have been clipped or
5006                                          * it may not exist anymore.  Look it
5007                                          * up again.
5008                                          */
5009                                         if (!vm_map_lookup_entry(map, s,
5010                                                                  &first_entry)) {
5011                                                 assert(map != kernel_map);
5012                                                 /*
5013                                                  * User: use the next entry
5014                                                  */
5015                                                 entry = first_entry->vme_next;
5016                                                 s = entry->vme_start;
5017                                         } else {
5018                                                 entry = first_entry;
5019                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5020                                         }
5021                                         last_timestamp = map->timestamp;
5022                                         continue;
5023                                 }
5024                                 else {
5025                                         return KERN_FAILURE;
5026                                 }
5027                         }
5028
5029                         entry->in_transition = TRUE;
5030                         /*
5031                          * copy current entry.  see comment in vm_map_wire()
5032                          */
5033                         tmp_entry = *entry;
5034                         assert(s == entry->vme_start);
5035
5036                         /*
5037                          * We can unlock the map now. The in_transition
5038                          * state guarentees existance of the entry.
5039                          */
5040                         vm_map_unlock(map);
5041
5042                         if (tmp_entry.is_sub_map) {
5043                                 vm_map_t sub_map;
5044                                 vm_map_offset_t sub_start, sub_end;
5045                                 pmap_t pmap;
5046                                 vm_map_offset_t pmap_addr;
5047
5048
5049                                 sub_map = tmp_entry.object.sub_map;
5050                                 sub_start = tmp_entry.offset;
5051                                 sub_end = sub_start + (tmp_entry.vme_end -
5052                                                        tmp_entry.vme_start);
5053                                 if (tmp_entry.use_pmap) {
5054                                         pmap = sub_map->pmap;
5055                                         pmap_addr = tmp_entry.vme_start;
5056                                 } else {
5057                                         pmap = map->pmap;
5058                                         pmap_addr = tmp_entry.vme_start;
5059                                 }
5060                                 (void) vm_map_unwire_nested(sub_map,
5061                                                             sub_start, sub_end,
5062                                                             user_wire,
5063                                                             pmap, pmap_addr);
5064                         } else {
5065
5066                                 vm_fault_unwire(map, &tmp_entry,
5067                                                 tmp_entry.object.vm_object == kernel_object,
5068                                                 map->pmap, tmp_entry.vme_start);
5069                         }
5070
5071                         vm_map_lock(map);
5072
5073                         if (last_timestamp+1 != map->timestamp) {
5074                                 /*
5075                                  * Find the entry again.  It could have
5076                                  * been clipped after we unlocked the map.
5077                                  */
5078                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
5079                                         assert((map != kernel_map) &&
5080                                                (!entry->is_sub_map));
5081                                         first_entry = first_entry->vme_next;
5082                                         s = first_entry->vme_start;
5083                                 } else {
5084                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5085                                 }
5086                         } else {
5087                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5088                                 first_entry = entry;
5089                         }
5090
5091                         last_timestamp = map->timestamp;
5092
5093                         entry = first_entry;
5094                         while ((entry != vm_map_to_entry(map)) &&
5095                                (entry->vme_start < tmp_entry.vme_end)) {
5096                                 assert(entry->in_transition);
5097                                 entry->in_transition = FALSE;
5098                                 if (entry->needs_wakeup) {
5099                                         entry->needs_wakeup = FALSE;
5100                                         need_wakeup = TRUE;
5101                                 }
5102                                 entry = entry->vme_next;
5103                         }
5104                         /*
5105                          * We have unwired the entry(s).  Go back and
5106                          * delete them.
5107                          */
5108                         entry = first_entry;
5109                         continue;
5110                 }
5111
5112                 /* entry is unwired */
5113                 assert(entry->wired_count == 0);
5114                 assert(entry->user_wired_count == 0);
5115
5116                 assert(s == entry->vme_start);
5117
5118                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5119                         /*
5120                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5121                          * vm_map_delete(), some map entries might have been
5122                          * transferred to a "zap_map", which doesn't have a
5123                          * pmap.  The original pmap has already been flushed
5124                          * in the vm_map_delete() call targeting the original
5125                          * map, but when we get to destroying the "zap_map",
5126                          * we don't have any pmap to flush, so let's just skip
5127                          * all this.
5128                          */
5129                 } else if (entry->is_sub_map) {
5130                         if (entry->use_pmap) {
5131 #ifndef NO_NESTED_PMAP
5132                                 pmap_unnest(map->pmap,
5133                                             (addr64_t)entry->vme_start,
5134                                             entry->vme_end - entry->vme_start);
5135 #endif  /* NO_NESTED_PMAP */
5136                                 if ((map->mapped) && (map->ref_count)) {
5137                                         /* clean up parent map/maps */
5138                                         vm_map_submap_pmap_clean(
5139                                                 map, entry->vme_start,
5140                                                 entry->vme_end,
5141                                                 entry->object.sub_map,
5142                                                 entry->offset);
5143                                 }
5144                         } else {
5145                                 vm_map_submap_pmap_clean(
5146                                         map, entry->vme_start, entry->vme_end,
5147                                         entry->object.sub_map,
5148                                         entry->offset);
5149                         }
5150                 } else if (entry->object.vm_object != kernel_object) {
5151                         object = entry->object.vm_object;
5152                         if((map->mapped) && (map->ref_count)) {
5153                                 vm_object_pmap_protect(
5154                                         object, entry->offset,
5155                                         entry->vme_end - entry->vme_start,
5156                                         PMAP_NULL,
5157                                         entry->vme_start,
5158                                         VM_PROT_NONE);
5159                         } else {
5160                                 pmap_remove(map->pmap,
5161                                             (addr64_t)entry->vme_start,
5162                                             (addr64_t)entry->vme_end);
5163                         }
5164                 }
5165
5166                 /*
5167                  * All pmap mappings for this map entry must have been
5168                  * cleared by now.
5169                  */
5170                 assert(vm_map_pmap_is_empty(map,
5171                                             entry->vme_start,
5172                                             entry->vme_end));
5173
5174                 next = entry->vme_next;
5175                 s = next->vme_start;
5176                 last_timestamp = map->timestamp;
5177
5178                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5179                     zap_map != VM_MAP_NULL) {
5180                         vm_map_size_t entry_size;
5181                         /*
5182                          * The caller wants to save the affected VM map entries
5183                          * into the "zap_map".  The caller will take care of
5184                          * these entries.
5185                          */
5186                         /* unlink the entry from "map" ... */
5187                         vm_map_entry_unlink(map, entry);
5188                         /* ... and add it to the end of the "zap_map" */
5189                         vm_map_entry_link(zap_map,
5190                                           vm_map_last_entry(zap_map),
5191                                           entry);
5192                         entry_size = entry->vme_end - entry->vme_start;
5193                         map->size -= entry_size;
5194                         zap_map->size += entry_size;
5195                         /* we didn't unlock the map, so no timestamp increase */
5196                         last_timestamp--;
5197                 } else {
5198                         vm_map_entry_delete(map, entry);
5199                         /* vm_map_entry_delete unlocks the map */
5200                         vm_map_lock(map);
5201                 }
5202
5203                 entry = next;
5204
5205                 if(entry == vm_map_to_entry(map)) {
5206                         break;
5207                 }
5208                 if (last_timestamp+1 != map->timestamp) {
5209                         /*
5210                          * we are responsible for deleting everything
5211                          * from the give space, if someone has interfered
5212                          * we pick up where we left off, back fills should
5213                          * be all right for anyone except map_delete and
5214                          * we have to assume that the task has been fully
5215                          * disabled before we get here
5216                          */
5217                         if (!vm_map_lookup_entry(map, s, &entry)){
5218                                 entry = entry->vme_next;
5219                                 s = entry->vme_start;
5220                         } else {
5221                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5222                         }
5223                         /*
5224                          * others can not only allocate behind us, we can
5225                          * also see coalesce while we don't have the map lock
5226                          */
5227                         if(entry == vm_map_to_entry(map)) {
5228                                 break;
5229                         }
5230                 }
5231                 last_timestamp = map->timestamp;
5232         }
5233
5234         if (map->wait_for_space)
5235                 thread_wakeup((event_t) map);
5236         /*
5237          * wake up anybody waiting on entries that we have already deleted.
5238          */
5239         if (need_wakeup)
5240                 vm_map_entry_wakeup(map);
5241
5242         return KERN_SUCCESS;
5243 }
5244
5245 /*
5246  *      vm_map_remove:
5247  *
5248  *      Remove the given address range from the target map.
5249  *      This is the exported form of vm_map_delete.
5250  */
5251 kern_return_t
5252 vm_map_remove(
5253         register vm_map_t       map,
5254         register vm_map_offset_t        start,
5255         register vm_map_offset_t        end,
5256         register boolean_t      flags)
5257 {
5258         register kern_return_t  result;
5259
5260         vm_map_lock(map);
5261         VM_MAP_RANGE_CHECK(map, start, end);
5262         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5263         vm_map_unlock(map);
5264
5265         return(result);
5266 }
5267
5268
5269 /*
5270  *      Routine:        vm_map_copy_discard
5271  *
5272  *      Description:
5273  *              Dispose of a map copy object (returned by
5274  *              vm_map_copyin).
5275  */
5276 void
5277 vm_map_copy_discard(
5278         vm_map_copy_t   copy)
5279 {
5280         if (copy == VM_MAP_COPY_NULL)
5281                 return;
5282
5283         switch (copy->type) {
5284         case VM_MAP_COPY_ENTRY_LIST:
5285                 while (vm_map_copy_first_entry(copy) !=
5286                        vm_map_copy_to_entry(copy)) {
5287                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5288
5289                         vm_map_copy_entry_unlink(copy, entry);
5290                         vm_object_deallocate(entry->object.vm_object);
5291                         vm_map_copy_entry_dispose(copy, entry);
5292                 }
5293                 break;
5294         case VM_MAP_COPY_OBJECT:
5295                 vm_object_deallocate(copy->cpy_object);
5296                 break;
5297         case VM_MAP_COPY_KERNEL_BUFFER:
5298
5299                 /*
5300                  * The vm_map_copy_t and possibly the data buffer were
5301                  * allocated by a single call to kalloc(), i.e. the
5302                  * vm_map_copy_t was not allocated out of the zone.
5303                  */
5304                 kfree(copy, copy->cpy_kalloc_size);
5305                 return;
5306         }
5307         zfree(vm_map_copy_zone, copy);
5308 }
5309
5310 /*
5311  *      Routine:        vm_map_copy_copy
5312  *
5313  *      Description:
5314  *                      Move the information in a map copy object to
5315  *                      a new map copy object, leaving the old one
5316  *                      empty.
5317  *
5318  *                      This is used by kernel routines that need
5319  *                      to look at out-of-line data (in copyin form)
5320  *                      before deciding whether to return SUCCESS.
5321  *                      If the routine returns FAILURE, the original
5322  *                      copy object will be deallocated; therefore,
5323  *                      these routines must make a copy of the copy
5324  *                      object and leave the original empty so that
5325  *                      deallocation will not fail.
5326  */
5327 vm_map_copy_t
5328 vm_map_copy_copy(
5329         vm_map_copy_t   copy)
5330 {
5331         vm_map_copy_t   new_copy;
5332
5333         if (copy == VM_MAP_COPY_NULL)
5334                 return VM_MAP_COPY_NULL;
5335
5336         /*
5337          * Allocate a new copy object, and copy the information
5338          * from the old one into it.
5339          */
5340
5341         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5342         *new_copy = *copy;
5343
5344         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5345                 /*
5346                  * The links in the entry chain must be
5347                  * changed to point to the new copy object.
5348                  */
5349                 vm_map_copy_first_entry(copy)->vme_prev
5350                         = vm_map_copy_to_entry(new_copy);
5351                 vm_map_copy_last_entry(copy)->vme_next
5352                         = vm_map_copy_to_entry(new_copy);
5353         }
5354
5355         /*
5356          * Change the old copy object into one that contains
5357          * nothing to be deallocated.
5358          */
5359         copy->type = VM_MAP_COPY_OBJECT;
5360         copy->cpy_object = VM_OBJECT_NULL;
5361
5362         /*
5363          * Return the new object.
5364          */
5365         return new_copy;
5366 }
5367
5368 static kern_return_t
5369 vm_map_overwrite_submap_recurse(
5370         vm_map_t        dst_map,
5371         vm_map_offset_t dst_addr,
5372         vm_map_size_t   dst_size)
5373 {
5374         vm_map_offset_t dst_end;
5375         vm_map_entry_t  tmp_entry;
5376         vm_map_entry_t  entry;
5377         kern_return_t   result;
5378         boolean_t       encountered_sub_map = FALSE;
5379
5380
5381
5382         /*
5383          *      Verify that the destination is all writeable
5384          *      initially.  We have to trunc the destination
5385          *      address and round the copy size or we'll end up
5386          *      splitting entries in strange ways.
5387          */
5388
5389         dst_end = vm_map_round_page(dst_addr + dst_size);
5390         vm_map_lock(dst_map);
5391
5392 start_pass_1:
5393         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5394                 vm_map_unlock(dst_map);
5395                 return(KERN_INVALID_ADDRESS);
5396         }
5397
5398         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5399         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5400
5401         for (entry = tmp_entry;;) {
5402                 vm_map_entry_t  next;
5403
5404                 next = entry->vme_next;
5405                 while(entry->is_sub_map) {
5406                         vm_map_offset_t sub_start;
5407                         vm_map_offset_t sub_end;
5408                         vm_map_offset_t local_end;
5409
5410                         if (entry->in_transition) {
5411                                 /*
5412                                  * Say that we are waiting, and wait for entry.
5413                                  */
5414                                 entry->needs_wakeup = TRUE;
5415                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5416
5417                                 goto start_pass_1;
5418                         }
5419
5420                         encountered_sub_map = TRUE;
5421                         sub_start = entry->offset;
5422
5423                         if(entry->vme_end < dst_end)
5424                                 sub_end = entry->vme_end;
5425                         else
5426                                 sub_end = dst_end;
5427                         sub_end -= entry->vme_start;
5428                         sub_end += entry->offset;
5429                         local_end = entry->vme_end;
5430                         vm_map_unlock(dst_map);
5431
5432                         result = vm_map_overwrite_submap_recurse(
5433                                 entry->object.sub_map,
5434                                 sub_start,
5435                                 sub_end - sub_start);
5436
5437                         if(result != KERN_SUCCESS)
5438                                 return result;
5439                         if (dst_end <= entry->vme_end)
5440                                 return KERN_SUCCESS;
5441                         vm_map_lock(dst_map);
5442                         if(!vm_map_lookup_entry(dst_map, local_end,
5443                                                 &tmp_entry)) {
5444                                 vm_map_unlock(dst_map);
5445                                 return(KERN_INVALID_ADDRESS);
5446                         }
5447                         entry = tmp_entry;
5448                         next = entry->vme_next;
5449                 }
5450
5451                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5452                         vm_map_unlock(dst_map);
5453                         return(KERN_PROTECTION_FAILURE);
5454                 }
5455
5456                 /*
5457                  *      If the entry is in transition, we must wait
5458                  *      for it to exit that state.  Anything could happen
5459                  *      when we unlock the map, so start over.
5460                  */
5461                 if (entry->in_transition) {
5462
5463                         /*
5464                          * Say that we are waiting, and wait for entry.
5465                          */
5466                         entry->needs_wakeup = TRUE;
5467                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5468
5469                         goto start_pass_1;
5470                 }
5471
5472 /*
5473  *              our range is contained completely within this map entry
5474  */
5475                 if (dst_end <= entry->vme_end) {
5476                         vm_map_unlock(dst_map);
5477                         return KERN_SUCCESS;
5478                 }
5479 /*
5480  *              check that range specified is contiguous region
5481  */
5482                 if ((next == vm_map_to_entry(dst_map)) ||
5483                     (next->vme_start != entry->vme_end)) {
5484                         vm_map_unlock(dst_map);
5485                         return(KERN_INVALID_ADDRESS);
5486                 }
5487
5488                 /*
5489                  *      Check for permanent objects in the destination.
5490                  */
5491                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5492                     ((!entry->object.vm_object->internal) ||
5493                      (entry->object.vm_object->true_share))) {
5494                         if(encountered_sub_map) {
5495                                 vm_map_unlock(dst_map);
5496                                 return(KERN_FAILURE);
5497                         }
5498                 }
5499
5500
5501                 entry = next;
5502         }/* for */
5503         vm_map_unlock(dst_map);
5504         return(KERN_SUCCESS);
5505 }
5506
5507 /*
5508  *      Routine:        vm_map_copy_overwrite
5509  *
5510  *      Description:
5511  *              Copy the memory described by the map copy
5512  *              object (copy; returned by vm_map_copyin) onto
5513  *              the specified destination region (dst_map, dst_addr).
5514  *              The destination must be writeable.
5515  *
5516  *              Unlike vm_map_copyout, this routine actually
5517  *              writes over previously-mapped memory.  If the
5518  *              previous mapping was to a permanent (user-supplied)
5519  *              memory object, it is preserved.
5520  *
5521  *              The attributes (protection and inheritance) of the
5522  *              destination region are preserved.
5523  *
5524  *              If successful, consumes the copy object.
5525  *              Otherwise, the caller is responsible for it.
5526  *
5527  *      Implementation notes:
5528  *              To overwrite aligned temporary virtual memory, it is
5529  *              sufficient to remove the previous mapping and insert
5530  *              the new copy.  This replacement is done either on
5531  *              the whole region (if no permanent virtual memory
5532  *              objects are embedded in the destination region) or
5533  *              in individual map entries.
5534  *
5535  *              To overwrite permanent virtual memory , it is necessary
5536  *              to copy each page, as the external memory management
5537  *              interface currently does not provide any optimizations.
5538  *
5539  *              Unaligned memory also has to be copied.  It is possible
5540  *              to use 'vm_trickery' to copy the aligned data.  This is
5541  *              not done but not hard to implement.
5542  *
5543  *              Once a page of permanent memory has been overwritten,
5544  *              it is impossible to interrupt this function; otherwise,
5545  *              the call would be neither atomic nor location-independent.
5546  *              The kernel-state portion of a user thread must be
5547  *              interruptible.
5548  *
5549  *              It may be expensive to forward all requests that might
5550  *              overwrite permanent memory (vm_write, vm_copy) to
5551  *              uninterruptible kernel threads.  This routine may be
5552  *              called by interruptible threads; however, success is
5553  *              not guaranteed -- if the request cannot be performed
5554  *              atomically and interruptibly, an error indication is
5555  *              returned.
5556  */
5557
5558 static kern_return_t
5559 vm_map_copy_overwrite_nested(
5560         vm_map_t                dst_map,
5561         vm_map_address_t        dst_addr,
5562         vm_map_copy_t           copy,
5563         boolean_t               interruptible,
5564         pmap_t                  pmap)
5565 {
5566         vm_map_offset_t         dst_end;
5567         vm_map_entry_t          tmp_entry;
5568         vm_map_entry_t          entry;
5569         kern_return_t           kr;
5570         boolean_t               aligned = TRUE;
5571         boolean_t               contains_permanent_objects = FALSE;
5572         boolean_t               encountered_sub_map = FALSE;
5573         vm_map_offset_t         base_addr;
5574         vm_map_size_t           copy_size;
5575         vm_map_size_t           total_size;
5576
5577
5578         /*
5579          *      Check for null copy object.
5580          */
5581
5582         if (copy == VM_MAP_COPY_NULL)
5583                 return(KERN_SUCCESS);
5584
5585         /*
5586          *      Check for special kernel buffer allocated
5587          *      by new_ipc_kmsg_copyin.
5588          */
5589
5590         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5591                 return(vm_map_copyout_kernel_buffer(
5592                                dst_map, &dst_addr,
5593                                copy, TRUE));
5594         }
5595
5596         /*
5597          *      Only works for entry lists at the moment.  Will
5598          *      support page lists later.
5599          */
5600
5601         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5602
5603         if (copy->size == 0) {
5604                 vm_map_copy_discard(copy);
5605                 return(KERN_SUCCESS);
5606         }
5607
5608         /*
5609          *      Verify that the destination is all writeable
5610          *      initially.  We have to trunc the destination
5611          *      address and round the copy size or we'll end up
5612          *      splitting entries in strange ways.
5613          */
5614
5615         if (!page_aligned(copy->size) ||
5616             !page_aligned (copy->offset) ||
5617             !page_aligned (dst_addr))
5618         {
5619                 aligned = FALSE;
5620                 dst_end = vm_map_round_page(dst_addr + copy->size);
5621         } else {
5622                 dst_end = dst_addr + copy->size;
5623         }
5624
5625         vm_map_lock(dst_map);
5626
5627         /* LP64todo - remove this check when vm_map_commpage64()
5628          * no longer has to stuff in a map_entry for the commpage
5629          * above the map's max_offset.
5630          */
5631         if (dst_addr >= dst_map->max_offset) {
5632                 vm_map_unlock(dst_map);
5633                 return(KERN_INVALID_ADDRESS);
5634         }
5635
5636 start_pass_1:
5637         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5638                 vm_map_unlock(dst_map);
5639                 return(KERN_INVALID_ADDRESS);
5640         }
5641         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5642         for (entry = tmp_entry;;) {
5643                 vm_map_entry_t  next = entry->vme_next;
5644
5645                 while(entry->is_sub_map) {
5646                         vm_map_offset_t sub_start;
5647                         vm_map_offset_t sub_end;
5648                         vm_map_offset_t local_end;
5649
5650                         if (entry->in_transition) {
5651
5652                                 /*
5653                                  * Say that we are waiting, and wait for entry.
5654                                  */
5655                                 entry->needs_wakeup = TRUE;
5656                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5657
5658                                 goto start_pass_1;
5659                         }
5660
5661                         local_end = entry->vme_end;
5662                         if (!(entry->needs_copy)) {
5663                                 /* if needs_copy we are a COW submap */
5664                                 /* in such a case we just replace so */
5665                                 /* there is no need for the follow-  */
5666                                 /* ing check.                        */
5667                                 encountered_sub_map = TRUE;
5668                                 sub_start = entry->offset;
5669
5670                                 if(entry->vme_end < dst_end)
5671                                         sub_end = entry->vme_end;
5672                                 else
5673                                         sub_end = dst_end;
5674                                 sub_end -= entry->vme_start;
5675                                 sub_end += entry->offset;
5676                                 vm_map_unlock(dst_map);
5677
5678                                 kr = vm_map_overwrite_submap_recurse(
5679                                         entry->object.sub_map,
5680                                         sub_start,
5681                                         sub_end - sub_start);
5682                                 if(kr != KERN_SUCCESS)
5683                                         return kr;
5684                                 vm_map_lock(dst_map);
5685                         }
5686
5687                         if (dst_end <= entry->vme_end)
5688                                 goto start_overwrite;
5689                         if(!vm_map_lookup_entry(dst_map, local_end,
5690                                                 &entry)) {
5691                                 vm_map_unlock(dst_map);
5692                                 return(KERN_INVALID_ADDRESS);
5693                         }
5694                         next = entry->vme_next;
5695                 }
5696
5697                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5698                         vm_map_unlock(dst_map);
5699                         return(KERN_PROTECTION_FAILURE);
5700                 }
5701
5702                 /*
5703                  *      If the entry is in transition, we must wait
5704                  *      for it to exit that state.  Anything could happen
5705                  *      when we unlock the map, so start over.
5706                  */
5707                 if (entry->in_transition) {
5708
5709                         /*
5710                          * Say that we are waiting, and wait for entry.
5711                          */
5712                         entry->needs_wakeup = TRUE;
5713                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5714
5715                         goto start_pass_1;
5716                 }
5717
5718 /*
5719  *              our range is contained completely within this map entry
5720  */
5721                 if (dst_end <= entry->vme_end)
5722                         break;
5723 /*
5724  *              check that range specified is contiguous region
5725  */
5726                 if ((next == vm_map_to_entry(dst_map)) ||
5727                     (next->vme_start != entry->vme_end)) {
5728                         vm_map_unlock(dst_map);
5729                         return(KERN_INVALID_ADDRESS);
5730                 }
5731
5732
5733                 /*
5734                  *      Check for permanent objects in the destination.
5735                  */
5736                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5737                     ((!entry->object.vm_object->internal) ||
5738                      (entry->object.vm_object->true_share))) {
5739                         contains_permanent_objects = TRUE;
5740                 }
5741
5742                 entry = next;
5743         }/* for */
5744
5745 start_overwrite:
5746         /*
5747          *      If there are permanent objects in the destination, then
5748          *      the copy cannot be interrupted.
5749          */
5750
5751         if (interruptible && contains_permanent_objects) {
5752                 vm_map_unlock(dst_map);
5753                 return(KERN_FAILURE);   /* XXX */
5754         }
5755
5756         /*
5757          *
5758          *      Make a second pass, overwriting the data
5759          *      At the beginning of each loop iteration,
5760          *      the next entry to be overwritten is "tmp_entry"
5761          *      (initially, the value returned from the lookup above),
5762          *      and the starting address expected in that entry
5763          *      is "start".
5764          */
5765
5766         total_size = copy->size;
5767         if(encountered_sub_map) {
5768                 copy_size = 0;
5769                 /* re-calculate tmp_entry since we've had the map */
5770                 /* unlocked */
5771                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5772                         vm_map_unlock(dst_map);
5773                         return(KERN_INVALID_ADDRESS);
5774                 }
5775         } else {
5776                 copy_size = copy->size;
5777         }
5778
5779         base_addr = dst_addr;
5780         while(TRUE) {
5781                 /* deconstruct the copy object and do in parts */
5782                 /* only in sub_map, interruptable case */
5783                 vm_map_entry_t  copy_entry;
5784                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5785                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5786                 int             nentries;
5787                 int             remaining_entries = 0;
5788                 vm_map_offset_t new_offset = 0;
5789
5790                 for (entry = tmp_entry; copy_size == 0;) {
5791                         vm_map_entry_t  next;
5792
5793                         next = entry->vme_next;
5794
5795                         /* tmp_entry and base address are moved along */
5796                         /* each time we encounter a sub-map.  Otherwise */
5797                         /* entry can outpase tmp_entry, and the copy_size */
5798                         /* may reflect the distance between them */
5799                         /* if the current entry is found to be in transition */
5800                         /* we will start over at the beginning or the last */
5801                         /* encounter of a submap as dictated by base_addr */
5802                         /* we will zero copy_size accordingly. */
5803                         if (entry->in_transition) {
5804                                 /*
5805                                  * Say that we are waiting, and wait for entry.
5806                                  */
5807                                 entry->needs_wakeup = TRUE;
5808                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5809
5810                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5811                                                         &tmp_entry)) {
5812                                         vm_map_unlock(dst_map);
5813                                         return(KERN_INVALID_ADDRESS);
5814                                 }
5815                                 copy_size = 0;
5816                                 entry = tmp_entry;
5817                                 continue;
5818                         }
5819                         if(entry->is_sub_map) {
5820                                 vm_map_offset_t sub_start;
5821                                 vm_map_offset_t sub_end;
5822                                 vm_map_offset_t local_end;
5823
5824                                 if (entry->needs_copy) {
5825                                         /* if this is a COW submap */
5826                                         /* just back the range with a */
5827                                         /* anonymous entry */
5828                                         if(entry->vme_end < dst_end)
5829                                                 sub_end = entry->vme_end;
5830                                         else
5831                                                 sub_end = dst_end;
5832                                         if(entry->vme_start < base_addr)
5833                                                 sub_start = base_addr;
5834                                         else
5835                                                 sub_start = entry->vme_start;
5836                                         vm_map_clip_end(
5837                                                 dst_map, entry, sub_end);
5838                                         vm_map_clip_start(
5839                                                 dst_map, entry, sub_start);
5840                                         assert(!entry->use_pmap);
5841                                         entry->is_sub_map = FALSE;
5842                                         vm_map_deallocate(
5843                                                 entry->object.sub_map);
5844                                         entry->object.sub_map = NULL;
5845                                         entry->is_shared = FALSE;
5846                                         entry->needs_copy = FALSE;
5847                                         entry->offset = 0;
5848                                         /*
5849                                          * XXX FBDP
5850                                          * We should propagate the protections
5851                                          * of the submap entry here instead
5852                                          * of forcing them to VM_PROT_ALL...
5853                                          * Or better yet, we should inherit
5854                                          * the protection of the copy_entry.
5855                                          */
5856                                         entry->protection = VM_PROT_ALL;
5857                                         entry->max_protection = VM_PROT_ALL;
5858                                         entry->wired_count = 0;
5859                                         entry->user_wired_count = 0;
5860                                         if(entry->inheritance
5861                                            == VM_INHERIT_SHARE)
5862                                                 entry->inheritance = VM_INHERIT_COPY;
5863                                         continue;
5864                                 }
5865                                 /* first take care of any non-sub_map */
5866                                 /* entries to send */
5867                                 if(base_addr < entry->vme_start) {
5868                                         /* stuff to send */
5869                                         copy_size =
5870                                                 entry->vme_start - base_addr;
5871                                         break;
5872                                 }
5873                                 sub_start = entry->offset;
5874
5875                                 if(entry->vme_end < dst_end)
5876                                         sub_end = entry->vme_end;
5877                                 else
5878                                         sub_end = dst_end;
5879                                 sub_end -= entry->vme_start;
5880                                 sub_end += entry->offset;
5881                                 local_end = entry->vme_end;
5882                                 vm_map_unlock(dst_map);
5883                                 copy_size = sub_end - sub_start;
5884
5885                                 /* adjust the copy object */
5886                                 if (total_size > copy_size) {
5887                                         vm_map_size_t   local_size = 0;
5888                                         vm_map_size_t   entry_size;
5889
5890                                         nentries = 1;
5891                                         new_offset = copy->offset;
5892                                         copy_entry = vm_map_copy_first_entry(copy);
5893                                         while(copy_entry !=
5894                                               vm_map_copy_to_entry(copy)){
5895                                                 entry_size = copy_entry->vme_end -
5896                                                         copy_entry->vme_start;
5897                                                 if((local_size < copy_size) &&
5898                                                    ((local_size + entry_size)
5899                                                     >= copy_size)) {
5900                                                         vm_map_copy_clip_end(copy,
5901                                                                              copy_entry,
5902                                                                              copy_entry->vme_start +
5903                                                                              (copy_size - local_size));
5904                                                         entry_size = copy_entry->vme_end -
5905                                                                 copy_entry->vme_start;
5906                                                         local_size += entry_size;
5907                                                         new_offset += entry_size;
5908                                                 }
5909                                                 if(local_size >= copy_size) {
5910                                                         next_copy = copy_entry->vme_next;
5911                                                         copy_entry->vme_next =
5912                                                                 vm_map_copy_to_entry(copy);
5913                                                         previous_prev =
5914                                                                 copy->cpy_hdr.links.prev;
5915                                                         copy->cpy_hdr.links.prev = copy_entry;
5916                                                         copy->size = copy_size;
5917                                                         remaining_entries =
5918                                                                 copy->cpy_hdr.nentries;
5919                                                         remaining_entries -= nentries;
5920                                                         copy->cpy_hdr.nentries = nentries;
5921                                                         break;
5922                                                 } else {
5923                                                         local_size += entry_size;
5924                                                         new_offset += entry_size;
5925                                                         nentries++;
5926                                                 }
5927                                                 copy_entry = copy_entry->vme_next;
5928                                         }
5929                                 }
5930
5931                                 if((entry->use_pmap) && (pmap == NULL)) {
5932                                         kr = vm_map_copy_overwrite_nested(
5933                                                 entry->object.sub_map,
5934                                                 sub_start,
5935                                                 copy,
5936                                                 interruptible,
5937                                                 entry->object.sub_map->pmap);
5938                                 } else if (pmap != NULL) {
5939                                         kr = vm_map_copy_overwrite_nested(
5940                                                 entry->object.sub_map,
5941                                                 sub_start,
5942                                                 copy,
5943                                                 interruptible, pmap);
5944                                 } else {
5945                                         kr = vm_map_copy_overwrite_nested(
5946                                                 entry->object.sub_map,
5947                                                 sub_start,
5948                                                 copy,
5949                                                 interruptible,
5950                                                 dst_map->pmap);
5951                                 }
5952                                 if(kr != KERN_SUCCESS) {
5953                                         if(next_copy != NULL) {
5954                                                 copy->cpy_hdr.nentries +=
5955                                                         remaining_entries;
5956                                                 copy->cpy_hdr.links.prev->vme_next =
5957                                                         next_copy;
5958                                                 copy->cpy_hdr.links.prev
5959                                                         = previous_prev;
5960                                                 copy->size = total_size;
5961                                         }
5962                                         return kr;
5963                                 }
5964                                 if (dst_end <= local_end) {
5965                                         return(KERN_SUCCESS);
5966                                 }
5967                                 /* otherwise copy no longer exists, it was */
5968                                 /* destroyed after successful copy_overwrite */
5969                                 copy = (vm_map_copy_t)
5970                                         zalloc(vm_map_copy_zone);
5971                                 vm_map_copy_first_entry(copy) =
5972                                         vm_map_copy_last_entry(copy) =
5973                                         vm_map_copy_to_entry(copy);
5974                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
5975                                 copy->offset = new_offset;
5976
5977                                 total_size -= copy_size;
5978                                 copy_size = 0;
5979                                 /* put back remainder of copy in container */
5980                                 if(next_copy != NULL) {
5981                                         copy->cpy_hdr.nentries = remaining_entries;
5982                                         copy->cpy_hdr.links.next = next_copy;
5983                                         copy->cpy_hdr.links.prev = previous_prev;
5984                                         copy->size = total_size;
5985                                         next_copy->vme_prev =
5986                                                 vm_map_copy_to_entry(copy);
5987                                         next_copy = NULL;
5988                                 }
5989                                 base_addr = local_end;
5990                                 vm_map_lock(dst_map);
5991                                 if(!vm_map_lookup_entry(dst_map,
5992                                                         local_end, &tmp_entry)) {
5993                                         vm_map_unlock(dst_map);
5994                                         return(KERN_INVALID_ADDRESS);
5995                                 }
5996                                 entry = tmp_entry;
5997                                 continue;
5998                         }
5999                         if (dst_end <= entry->vme_end) {
6000                                 copy_size = dst_end - base_addr;
6001                                 break;
6002                         }
6003
6004                         if ((next == vm_map_to_entry(dst_map)) ||
6005                             (next->vme_start != entry->vme_end)) {
6006                                 vm_map_unlock(dst_map);
6007                                 return(KERN_INVALID_ADDRESS);
6008                         }
6009
6010                         entry = next;
6011                 }/* for */
6012
6013                 next_copy = NULL;
6014                 nentries = 1;
6015
6016                 /* adjust the copy object */
6017                 if (total_size > copy_size) {
6018                         vm_map_size_t   local_size = 0;
6019                         vm_map_size_t   entry_size;
6020
6021                         new_offset = copy->offset;
6022                         copy_entry = vm_map_copy_first_entry(copy);
6023                         while(copy_entry != vm_map_copy_to_entry(copy)) {
6024                                 entry_size = copy_entry->vme_end -
6025                                         copy_entry->vme_start;
6026                                 if((local_size < copy_size) &&
6027                                    ((local_size + entry_size)
6028                                     >= copy_size)) {
6029                                         vm_map_copy_clip_end(copy, copy_entry,
6030                                                              copy_entry->vme_start +
6031                                                              (copy_size - local_size));
6032                                         entry_size = copy_entry->vme_end -
6033                                                 copy_entry->vme_start;
6034                                         local_size += entry_size;
6035                                         new_offset += entry_size;
6036                                 }
6037                                 if(local_size >= copy_size) {
6038                                         next_copy = copy_entry->vme_next;
6039                                         copy_entry->vme_next =
6040                                                 vm_map_copy_to_entry(copy);
6041                                         previous_prev =
6042                                                 copy->cpy_hdr.links.prev;
6043                                         copy->cpy_hdr.links.prev = copy_entry;
6044                                         copy->size = copy_size;
6045                                         remaining_entries =
6046                                                 copy->cpy_hdr.nentries;
6047                                         remaining_entries -= nentries;
6048                                         copy->cpy_hdr.nentries = nentries;
6049                                         break;
6050                                 } else {
6051                                         local_size += entry_size;
6052                                         new_offset += entry_size;
6053                                         nentries++;
6054                                 }
6055                                 copy_entry = copy_entry->vme_next;
6056                         }
6057                 }
6058
6059                 if (aligned) {
6060                         pmap_t  local_pmap;
6061
6062                         if(pmap)
6063                                 local_pmap = pmap;
6064                         else
6065                                 local_pmap = dst_map->pmap;
6066
6067                         if ((kr =  vm_map_copy_overwrite_aligned(
6068                                      dst_map, tmp_entry, copy,
6069                                      base_addr, local_pmap)) != KERN_SUCCESS) {
6070                                 if(next_copy != NULL) {
6071                                         copy->cpy_hdr.nentries +=
6072                                                 remaining_entries;
6073                                         copy->cpy_hdr.links.prev->vme_next =
6074                                                 next_copy;
6075                                         copy->cpy_hdr.links.prev =
6076                                                 previous_prev;
6077                                         copy->size += copy_size;
6078                                 }
6079                                 return kr;
6080                         }
6081                         vm_map_unlock(dst_map);
6082                 } else {
6083                         /*
6084                          * Performance gain:
6085                          *
6086                          * if the copy and dst address are misaligned but the same
6087                          * offset within the page we can copy_not_aligned the
6088                          * misaligned parts and copy aligned the rest.  If they are
6089                          * aligned but len is unaligned we simply need to copy
6090                          * the end bit unaligned.  We'll need to split the misaligned
6091                          * bits of the region in this case !
6092                          */
6093                         /* ALWAYS UNLOCKS THE dst_map MAP */
6094                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
6095                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6096                                 if(next_copy != NULL) {
6097                                         copy->cpy_hdr.nentries +=
6098                                                 remaining_entries;
6099                                         copy->cpy_hdr.links.prev->vme_next =
6100                                                 next_copy;
6101                                         copy->cpy_hdr.links.prev =
6102                                                 previous_prev;
6103                                         copy->size += copy_size;
6104                                 }
6105                                 return kr;
6106                         }
6107                 }
6108                 total_size -= copy_size;
6109                 if(total_size == 0)
6110                         break;
6111                 base_addr += copy_size;
6112                 copy_size = 0;
6113                 copy->offset = new_offset;
6114                 if(next_copy != NULL) {
6115                         copy->cpy_hdr.nentries = remaining_entries;
6116                         copy->cpy_hdr.links.next = next_copy;
6117                         copy->cpy_hdr.links.prev = previous_prev;
6118                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6119                         copy->size = total_size;
6120                 }
6121                 vm_map_lock(dst_map);
6122                 while(TRUE) {
6123                         if (!vm_map_lookup_entry(dst_map,
6124                                                  base_addr, &tmp_entry)) {
6125                                 vm_map_unlock(dst_map);
6126                                 return(KERN_INVALID_ADDRESS);
6127                         }
6128                         if (tmp_entry->in_transition) {
6129                                 entry->needs_wakeup = TRUE;
6130                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6131                         } else {
6132                                 break;
6133                         }
6134                 }
6135                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6136
6137                 entry = tmp_entry;
6138         } /* while */
6139
6140         /*
6141          *      Throw away the vm_map_copy object
6142          */
6143         vm_map_copy_discard(copy);
6144
6145         return(KERN_SUCCESS);
6146 }/* vm_map_copy_overwrite */
6147
6148 kern_return_t
6149 vm_map_copy_overwrite(
6150         vm_map_t        dst_map,
6151         vm_map_offset_t dst_addr,
6152         vm_map_copy_t   copy,
6153         boolean_t       interruptible)
6154 {
6155         return vm_map_copy_overwrite_nested(
6156                 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
6157 }
6158
6159
6160 /*
6161  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6162  *
6163  *      Decription:
6164  *      Physically copy unaligned data
6165  *
6166  *      Implementation:
6167  *      Unaligned parts of pages have to be physically copied.  We use
6168  *      a modified form of vm_fault_copy (which understands none-aligned
6169  *      page offsets and sizes) to do the copy.  We attempt to copy as
6170  *      much memory in one go as possibly, however vm_fault_copy copies
6171  *      within 1 memory object so we have to find the smaller of "amount left"
6172  *      "source object data size" and "target object data size".  With
6173  *      unaligned data we don't need to split regions, therefore the source
6174  *      (copy) object should be one map entry, the target range may be split
6175  *      over multiple map entries however.  In any event we are pessimistic
6176  *      about these assumptions.
6177  *
6178  *      Assumptions:
6179  *      dst_map is locked on entry and is return locked on success,
6180  *      unlocked on error.
6181  */
6182
6183 static kern_return_t
6184 vm_map_copy_overwrite_unaligned(
6185         vm_map_t        dst_map,
6186         vm_map_entry_t  entry,
6187         vm_map_copy_t   copy,
6188         vm_map_offset_t start)
6189 {
6190         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6191         vm_map_version_t        version;
6192         vm_object_t             dst_object;
6193         vm_object_offset_t      dst_offset;
6194         vm_object_offset_t      src_offset;
6195         vm_object_offset_t      entry_offset;
6196         vm_map_offset_t         entry_end;
6197         vm_map_size_t           src_size,
6198                                 dst_size,
6199                                 copy_size,
6200                                 amount_left;
6201         kern_return_t           kr = KERN_SUCCESS;
6202
6203         vm_map_lock_write_to_read(dst_map);
6204
6205         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6206         amount_left = copy->size;
6207 /*
6208  *      unaligned so we never clipped this entry, we need the offset into
6209  *      the vm_object not just the data.
6210  */
6211         while (amount_left > 0) {
6212
6213                 if (entry == vm_map_to_entry(dst_map)) {
6214                         vm_map_unlock_read(dst_map);
6215                         return KERN_INVALID_ADDRESS;
6216                 }
6217
6218                 /* "start" must be within the current map entry */
6219                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6220
6221                 dst_offset = start - entry->vme_start;
6222
6223                 dst_size = entry->vme_end - start;
6224
6225                 src_size = copy_entry->vme_end -
6226                         (copy_entry->vme_start + src_offset);
6227
6228                 if (dst_size < src_size) {
6229 /*
6230  *                      we can only copy dst_size bytes before
6231  *                      we have to get the next destination entry
6232  */
6233                         copy_size = dst_size;
6234                 } else {
6235 /*
6236  *                      we can only copy src_size bytes before
6237  *                      we have to get the next source copy entry
6238  */
6239                         copy_size = src_size;
6240                 }
6241
6242                 if (copy_size > amount_left) {
6243                         copy_size = amount_left;
6244                 }
6245 /*
6246  *              Entry needs copy, create a shadow shadow object for
6247  *              Copy on write region.
6248  */
6249                 if (entry->needs_copy &&
6250                     ((entry->protection & VM_PROT_WRITE) != 0))
6251                 {
6252                         if (vm_map_lock_read_to_write(dst_map)) {
6253                                 vm_map_lock_read(dst_map);
6254                                 goto RetryLookup;
6255                         }
6256                         vm_object_shadow(&entry->object.vm_object,
6257                                          &entry->offset,
6258                                          (vm_map_size_t)(entry->vme_end
6259                                                          - entry->vme_start));
6260                         entry->needs_copy = FALSE;
6261                         vm_map_lock_write_to_read(dst_map);
6262                 }
6263                 dst_object = entry->object.vm_object;
6264 /*
6265  *              unlike with the virtual (aligned) copy we're going
6266  *              to fault on it therefore we need a target object.
6267  */
6268                 if (dst_object == VM_OBJECT_NULL) {
6269                         if (vm_map_lock_read_to_write(dst_map)) {
6270                                 vm_map_lock_read(dst_map);
6271                                 goto RetryLookup;
6272                         }
6273                         dst_object = vm_object_allocate((vm_map_size_t)
6274                                                         entry->vme_end - entry->vme_start);
6275                         entry->object.vm_object = dst_object;
6276                         entry->offset = 0;
6277                         vm_map_lock_write_to_read(dst_map);
6278                 }
6279 /*
6280  *              Take an object reference and unlock map. The "entry" may
6281  *              disappear or change when the map is unlocked.
6282  */
6283                 vm_object_reference(dst_object);
6284                 version.main_timestamp = dst_map->timestamp;
6285                 entry_offset = entry->offset;
6286                 entry_end = entry->vme_end;
6287                 vm_map_unlock_read(dst_map);
6288 /*
6289  *              Copy as much as possible in one pass
6290  */
6291                 kr = vm_fault_copy(
6292                         copy_entry->object.vm_object,
6293                         copy_entry->offset + src_offset,
6294                         &copy_size,
6295                         dst_object,
6296                         entry_offset + dst_offset,
6297                         dst_map,
6298                         &version,
6299                         THREAD_UNINT );
6300
6301                 start += copy_size;
6302                 src_offset += copy_size;
6303                 amount_left -= copy_size;
6304 /*
6305  *              Release the object reference
6306  */
6307                 vm_object_deallocate(dst_object);
6308 /*
6309  *              If a hard error occurred, return it now
6310  */
6311                 if (kr != KERN_SUCCESS)
6312                         return kr;
6313
6314                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6315                     || amount_left == 0)
6316                 {
6317 /*
6318  *                      all done with this copy entry, dispose.
6319  */
6320                         vm_map_copy_entry_unlink(copy, copy_entry);
6321                         vm_object_deallocate(copy_entry->object.vm_object);
6322                         vm_map_copy_entry_dispose(copy, copy_entry);
6323
6324                         if ((copy_entry = vm_map_copy_first_entry(copy))
6325                             == vm_map_copy_to_entry(copy) && amount_left) {
6326 /*
6327  *                              not finished copying but run out of source
6328  */
6329                                 return KERN_INVALID_ADDRESS;
6330                         }
6331                         src_offset = 0;
6332                 }
6333
6334                 if (amount_left == 0)
6335                         return KERN_SUCCESS;
6336
6337                 vm_map_lock_read(dst_map);
6338                 if (version.main_timestamp == dst_map->timestamp) {
6339                         if (start == entry_end) {
6340 /*
6341  *                              destination region is split.  Use the version
6342  *                              information to avoid a lookup in the normal
6343  *                              case.
6344  */
6345                                 entry = entry->vme_next;
6346 /*
6347  *                              should be contiguous. Fail if we encounter
6348  *                              a hole in the destination.
6349  */
6350                                 if (start != entry->vme_start) {
6351                                         vm_map_unlock_read(dst_map);
6352                                         return KERN_INVALID_ADDRESS ;
6353                                 }
6354                         }
6355                 } else {
6356 /*
6357  *                      Map version check failed.
6358  *                      we must lookup the entry because somebody
6359  *                      might have changed the map behind our backs.
6360  */
6361                 RetryLookup:
6362                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6363                         {
6364                                 vm_map_unlock_read(dst_map);
6365                                 return KERN_INVALID_ADDRESS ;
6366                         }
6367                 }
6368         }/* while */
6369
6370         return KERN_SUCCESS;
6371 }/* vm_map_copy_overwrite_unaligned */
6372
6373 /*
6374  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6375  *
6376  *      Description:
6377  *      Does all the vm_trickery possible for whole pages.
6378  *
6379  *      Implementation:
6380  *
6381  *      If there are no permanent objects in the destination,
6382  *      and the source and destination map entry zones match,
6383  *      and the destination map entry is not shared,
6384  *      then the map entries can be deleted and replaced
6385  *      with those from the copy.  The following code is the
6386  *      basic idea of what to do, but there are lots of annoying
6387  *      little details about getting protection and inheritance
6388  *      right.  Should add protection, inheritance, and sharing checks
6389  *      to the above pass and make sure that no wiring is involved.
6390  */
6391
6392 static kern_return_t
6393 vm_map_copy_overwrite_aligned(
6394         vm_map_t        dst_map,
6395         vm_map_entry_t  tmp_entry,
6396         vm_map_copy_t   copy,
6397         vm_map_offset_t start,
6398         __unused pmap_t pmap)
6399 {
6400         vm_object_t     object;
6401         vm_map_entry_t  copy_entry;
6402         vm_map_size_t   copy_size;
6403         vm_map_size_t   size;
6404         vm_map_entry_t  entry;
6405
6406         while ((copy_entry = vm_map_copy_first_entry(copy))
6407                != vm_map_copy_to_entry(copy))
6408         {
6409                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6410
6411                 entry = tmp_entry;
6412                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6413                 if (entry == vm_map_to_entry(dst_map)) {
6414                         vm_map_unlock(dst_map);
6415                         return KERN_INVALID_ADDRESS;
6416                 }
6417                 size = (entry->vme_end - entry->vme_start);
6418                 /*
6419                  *      Make sure that no holes popped up in the
6420                  *      address map, and that the protection is
6421                  *      still valid, in case the map was unlocked
6422                  *      earlier.
6423                  */
6424
6425                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6426                                                     && !entry->needs_copy)) {
6427                         vm_map_unlock(dst_map);
6428                         return(KERN_INVALID_ADDRESS);
6429                 }
6430                 assert(entry != vm_map_to_entry(dst_map));
6431
6432                 /*
6433                  *      Check protection again
6434                  */
6435
6436                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6437                         vm_map_unlock(dst_map);
6438                         return(KERN_PROTECTION_FAILURE);
6439                 }
6440
6441                 /*
6442                  *      Adjust to source size first
6443                  */
6444
6445                 if (copy_size < size) {
6446                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6447                         size = copy_size;
6448                 }
6449
6450                 /*
6451                  *      Adjust to destination size
6452                  */
6453
6454                 if (size < copy_size) {
6455                         vm_map_copy_clip_end(copy, copy_entry,
6456                                              copy_entry->vme_start + size);
6457                         copy_size = size;
6458                 }
6459
6460                 assert((entry->vme_end - entry->vme_start) == size);
6461                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6462                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6463
6464                 /*
6465                  *      If the destination contains temporary unshared memory,
6466                  *      we can perform the copy by throwing it away and
6467                  *      installing the source data.
6468                  */
6469
6470                 object = entry->object.vm_object;
6471                 if ((!entry->is_shared &&
6472                      ((object == VM_OBJECT_NULL) ||
6473                       (object->internal && !object->true_share))) ||
6474                     entry->needs_copy) {
6475                         vm_object_t     old_object = entry->object.vm_object;
6476                         vm_object_offset_t      old_offset = entry->offset;
6477                         vm_object_offset_t      offset;
6478
6479                         /*
6480                          * Ensure that the source and destination aren't
6481                          * identical
6482                          */
6483                         if (old_object == copy_entry->object.vm_object &&
6484                             old_offset == copy_entry->offset) {
6485                                 vm_map_copy_entry_unlink(copy, copy_entry);
6486                                 vm_map_copy_entry_dispose(copy, copy_entry);
6487
6488                                 if (old_object != VM_OBJECT_NULL)
6489                                         vm_object_deallocate(old_object);
6490
6491                                 start = tmp_entry->vme_end;
6492                                 tmp_entry = tmp_entry->vme_next;
6493                                 continue;
6494                         }
6495
6496                         if (old_object != VM_OBJECT_NULL) {
6497                                 if(entry->is_sub_map) {
6498                                         if(entry->use_pmap) {
6499 #ifndef NO_NESTED_PMAP
6500                                                 pmap_unnest(dst_map->pmap,
6501                                                             (addr64_t)entry->vme_start,
6502                                                             entry->vme_end - entry->vme_start);
6503 #endif  /* NO_NESTED_PMAP */
6504                                                 if(dst_map->mapped) {
6505                                                         /* clean up parent */
6506                                                         /* map/maps */
6507                                                         vm_map_submap_pmap_clean(
6508                                                                 dst_map, entry->vme_start,
6509                                                                 entry->vme_end,
6510                                                                 entry->object.sub_map,
6511                                                                 entry->offset);
6512                                                 }
6513                                         } else {
6514                                                 vm_map_submap_pmap_clean(
6515                                                         dst_map, entry->vme_start,
6516                                                         entry->vme_end,
6517                                                         entry->object.sub_map,
6518                                                         entry->offset);
6519                                         }
6520                                         vm_map_deallocate(
6521                                                 entry->object.sub_map);
6522                                 } else {
6523                                         if(dst_map->mapped) {
6524                                                 vm_object_pmap_protect(
6525                                                         entry->object.vm_object,
6526                                                         entry->offset,
6527                                                         entry->vme_end
6528                                                         - entry->vme_start,
6529                                                         PMAP_NULL,
6530                                                         entry->vme_start,
6531                                                         VM_PROT_NONE);
6532                                         } else {
6533                                                 pmap_remove(dst_map->pmap,
6534                                                             (addr64_t)(entry->vme_start),
6535                                                             (addr64_t)(entry->vme_end));
6536                                         }
6537                                         vm_object_deallocate(old_object);
6538                                 }
6539                         }
6540
6541                         entry->is_sub_map = FALSE;
6542                         entry->object = copy_entry->object;
6543                         object = entry->object.vm_object;
6544                         entry->needs_copy = copy_entry->needs_copy;
6545                         entry->wired_count = 0;
6546                         entry->user_wired_count = 0;
6547                         offset = entry->offset = copy_entry->offset;
6548
6549                         vm_map_copy_entry_unlink(copy, copy_entry);
6550                         vm_map_copy_entry_dispose(copy, copy_entry);
6551
6552                         /*
6553                          * we could try to push pages into the pmap at this point, BUT
6554                          * this optimization only saved on average 2 us per page if ALL
6555                          * the pages in the source were currently mapped
6556                          * and ALL the pages in the dest were touched, if there were fewer
6557                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6558                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6559                          */
6560
6561                         /*
6562                          *      Set up for the next iteration.  The map
6563                          *      has not been unlocked, so the next
6564                          *      address should be at the end of this
6565                          *      entry, and the next map entry should be
6566                          *      the one following it.
6567                          */
6568
6569                         start = tmp_entry->vme_end;
6570                         tmp_entry = tmp_entry->vme_next;
6571                 } else {
6572                         vm_map_version_t        version;
6573                         vm_object_t             dst_object = entry->object.vm_object;
6574                         vm_object_offset_t      dst_offset = entry->offset;
6575                         kern_return_t           r;
6576
6577                         /*
6578                          *      Take an object reference, and record
6579                          *      the map version information so that the
6580                          *      map can be safely unlocked.
6581                          */
6582
6583                         vm_object_reference(dst_object);
6584
6585                         /* account for unlock bumping up timestamp */
6586                         version.main_timestamp = dst_map->timestamp + 1;
6587
6588                         vm_map_unlock(dst_map);
6589
6590                         /*
6591                          *      Copy as much as possible in one pass
6592                          */
6593
6594                         copy_size = size;
6595                         r = vm_fault_copy(
6596                                 copy_entry->object.vm_object,
6597                                 copy_entry->offset,
6598                                 &copy_size,
6599                                 dst_object,
6600                                 dst_offset,
6601                                 dst_map,
6602                                 &version,
6603                                 THREAD_UNINT );
6604
6605                         /*
6606                          *      Release the object reference
6607                          */
6608
6609                         vm_object_deallocate(dst_object);
6610
6611                         /*
6612                          *      If a hard error occurred, return it now
6613                          */
6614
6615                         if (r != KERN_SUCCESS)
6616                                 return(r);
6617
6618                         if (copy_size != 0) {
6619                                 /*
6620                                  *      Dispose of the copied region
6621                                  */
6622
6623                                 vm_map_copy_clip_end(copy, copy_entry,
6624                                                      copy_entry->vme_start + copy_size);
6625                                 vm_map_copy_entry_unlink(copy, copy_entry);
6626                                 vm_object_deallocate(copy_entry->object.vm_object);
6627                                 vm_map_copy_entry_dispose(copy, copy_entry);
6628                         }
6629
6630                         /*
6631                          *      Pick up in the destination map where we left off.
6632                          *
6633                          *      Use the version information to avoid a lookup
6634                          *      in the normal case.
6635                          */
6636
6637                         start += copy_size;
6638                         vm_map_lock(dst_map);
6639                         if (version.main_timestamp == dst_map->timestamp) {
6640                                 /* We can safely use saved tmp_entry value */
6641
6642                                 vm_map_clip_end(dst_map, tmp_entry, start);
6643                                 tmp_entry = tmp_entry->vme_next;
6644                         } else {
6645                                 /* Must do lookup of tmp_entry */
6646
6647                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6648                                         vm_map_unlock(dst_map);
6649                                         return(KERN_INVALID_ADDRESS);
6650                                 }
6651                                 vm_map_clip_start(dst_map, tmp_entry, start);
6652                         }
6653                 }
6654         }/* while */
6655
6656         return(KERN_SUCCESS);
6657 }/* vm_map_copy_overwrite_aligned */
6658
6659 /*
6660  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
6661  *
6662  *      Description:
6663  *              Copy in data to a kernel buffer from space in the
6664  *              source map. The original space may be optionally
6665  *              deallocated.
6666  *
6667  *              If successful, returns a new copy object.
6668  */
6669 static kern_return_t
6670 vm_map_copyin_kernel_buffer(
6671         vm_map_t        src_map,
6672         vm_map_offset_t src_addr,
6673         vm_map_size_t   len,
6674         boolean_t       src_destroy,
6675         vm_map_copy_t   *copy_result)
6676 {
6677         kern_return_t kr;
6678         vm_map_copy_t copy;
6679         vm_size_t kalloc_size;
6680
6681         if ((vm_size_t) len != len) {
6682                 /* "len" is too big and doesn't fit in a "vm_size_t" */
6683                 return KERN_RESOURCE_SHORTAGE;
6684         }
6685         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6686         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6687
6688         copy = (vm_map_copy_t) kalloc(kalloc_size);
6689         if (copy == VM_MAP_COPY_NULL) {
6690                 return KERN_RESOURCE_SHORTAGE;
6691         }
6692         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6693         copy->size = len;
6694         copy->offset = 0;
6695         copy->cpy_kdata = (void *) (copy + 1);
6696         copy->cpy_kalloc_size = kalloc_size;
6697
6698         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6699         if (kr != KERN_SUCCESS) {
6700                 kfree(copy, kalloc_size);
6701                 return kr;
6702         }
6703         if (src_destroy) {
6704                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6705                                      vm_map_round_page(src_addr + len),
6706                                      VM_MAP_REMOVE_INTERRUPTIBLE |
6707                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6708                                      (src_map == kernel_map) ?
6709                                      VM_MAP_REMOVE_KUNWIRE : 0);
6710         }
6711         *copy_result = copy;
6712         return KERN_SUCCESS;
6713 }
6714
6715 /*
6716  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
6717  *
6718  *      Description:
6719  *              Copy out data from a kernel buffer into space in the
6720  *              destination map. The space may be otpionally dynamically
6721  *              allocated.
6722  *
6723  *              If successful, consumes the copy object.
6724  *              Otherwise, the caller is responsible for it.
6725  */
6726 static int vm_map_copyout_kernel_buffer_failures = 0;
6727 static kern_return_t
6728 vm_map_copyout_kernel_buffer(
6729         vm_map_t                map,
6730         vm_map_address_t        *addr,  /* IN/OUT */
6731         vm_map_copy_t           copy,
6732         boolean_t               overwrite)
6733 {
6734         kern_return_t kr = KERN_SUCCESS;
6735         thread_t thread = current_thread();
6736
6737         if (!overwrite) {
6738
6739                 /*
6740                  * Allocate space in the target map for the data
6741                  */
6742                 *addr = 0;
6743                 kr = vm_map_enter(map,
6744                                   addr,
6745                                   vm_map_round_page(copy->size),
6746                                   (vm_map_offset_t) 0,
6747                                   VM_FLAGS_ANYWHERE,
6748                                   VM_OBJECT_NULL,
6749                                   (vm_object_offset_t) 0,
6750                                   FALSE,
6751                                   VM_PROT_DEFAULT,
6752                                   VM_PROT_ALL,
6753                                   VM_INHERIT_DEFAULT);
6754                 if (kr != KERN_SUCCESS)
6755                         return kr;
6756         }
6757
6758         /*
6759          * Copyout the data from the kernel buffer to the target map.
6760          */
6761         if (thread->map == map) {
6762
6763                 /*
6764                  * If the target map is the current map, just do
6765                  * the copy.
6766                  */
6767                 assert((vm_size_t) copy->size == copy->size);
6768                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6769                         kr = KERN_INVALID_ADDRESS;
6770                 }
6771         }
6772         else {
6773                 vm_map_t oldmap;
6774
6775                 /*
6776                  * If the target map is another map, assume the
6777                  * target's address space identity for the duration
6778                  * of the copy.
6779                  */
6780                 vm_map_reference(map);
6781                 oldmap = vm_map_switch(map);
6782
6783                 assert((vm_size_t) copy->size == copy->size);
6784                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6785                         vm_map_copyout_kernel_buffer_failures++;
6786                         kr = KERN_INVALID_ADDRESS;
6787                 }
6788
6789                 (void) vm_map_switch(oldmap);
6790                 vm_map_deallocate(map);
6791         }
6792
6793         if (kr != KERN_SUCCESS) {
6794                 /* the copy failed, clean up */
6795                 if (!overwrite) {
6796                         /*
6797                          * Deallocate the space we allocated in the target map.
6798                          */
6799                         (void) vm_map_remove(map,
6800                                              vm_map_trunc_page(*addr),
6801                                              vm_map_round_page(*addr +
6802                                                                vm_map_round_page(copy->size)),
6803                                              VM_MAP_NO_FLAGS);
6804                         *addr = 0;
6805                 }
6806         } else {
6807                 /* copy was successful, dicard the copy structure */
6808                 kfree(copy, copy->cpy_kalloc_size);
6809         }
6810
6811         return kr;
6812 }
6813
6814 /*
6815  *      Macro:          vm_map_copy_insert
6816  *
6817  *      Description:
6818  *              Link a copy chain ("copy") into a map at the
6819  *              specified location (after "where").
6820  *      Side effects:
6821  *              The copy chain is destroyed.
6822  *      Warning:
6823  *              The arguments are evaluated multiple times.
6824  */
6825 #define vm_map_copy_insert(map, where, copy)                            \
6826 MACRO_BEGIN                                                             \
6827         vm_map_t VMCI_map;                                              \
6828         vm_map_entry_t VMCI_where;                                      \
6829         vm_map_copy_t VMCI_copy;                                        \
6830         VMCI_map = (map);                                               \
6831         VMCI_where = (where);                                           \
6832         VMCI_copy = (copy);                                             \
6833         ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6834                 ->vme_next = (VMCI_where->vme_next);                    \
6835         ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy))   \
6836                 ->vme_prev = VMCI_where;                                \
6837         VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries;          \
6838         UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free);              \
6839         zfree(vm_map_copy_zone, VMCI_copy);                             \
6840 MACRO_END
6841
6842 /*
6843  *      Routine:        vm_map_copyout
6844  *
6845  *      Description:
6846  *              Copy out a copy chain ("copy") into newly-allocated
6847  *              space in the destination map.
6848  *
6849  *              If successful, consumes the copy object.
6850  *              Otherwise, the caller is responsible for it.
6851  */
6852 kern_return_t
6853 vm_map_copyout(
6854         vm_map_t                dst_map,
6855         vm_map_address_t        *dst_addr,      /* OUT */
6856         vm_map_copy_t           copy)
6857 {
6858         vm_map_size_t           size;
6859         vm_map_size_t           adjustment;
6860         vm_map_offset_t         start;
6861         vm_object_offset_t      vm_copy_start;
6862         vm_map_entry_t          last;
6863         register
6864         vm_map_entry_t          entry;
6865
6866         /*
6867          *      Check for null copy object.
6868          */
6869
6870         if (copy == VM_MAP_COPY_NULL) {
6871                 *dst_addr = 0;
6872                 return(KERN_SUCCESS);
6873         }
6874
6875         /*
6876          *      Check for special copy object, created
6877          *      by vm_map_copyin_object.
6878          */
6879
6880         if (copy->type == VM_MAP_COPY_OBJECT) {
6881                 vm_object_t             object = copy->cpy_object;
6882                 kern_return_t           kr;
6883                 vm_object_offset_t      offset;
6884
6885                 offset = vm_object_trunc_page(copy->offset);
6886                 size = vm_map_round_page(copy->size +
6887                                          (vm_map_size_t)(copy->offset - offset));
6888                 *dst_addr = 0;
6889                 kr = vm_map_enter(dst_map, dst_addr, size,
6890                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6891                                   object, offset, FALSE,
6892                                   VM_PROT_DEFAULT, VM_PROT_ALL,
6893                                   VM_INHERIT_DEFAULT);
6894                 if (kr != KERN_SUCCESS)
6895                         return(kr);
6896                 /* Account for non-pagealigned copy object */
6897                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6898                 zfree(vm_map_copy_zone, copy);
6899                 return(KERN_SUCCESS);
6900         }
6901
6902         /*
6903          *      Check for special kernel buffer allocated
6904          *      by new_ipc_kmsg_copyin.
6905          */
6906
6907         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6908                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6909                                                     copy, FALSE));
6910         }
6911
6912         /*
6913          *      Find space for the data
6914          */
6915
6916         vm_copy_start = vm_object_trunc_page(copy->offset);
6917         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6918                 - vm_copy_start;
6919
6920 StartAgain: ;
6921
6922         vm_map_lock(dst_map);
6923         assert(first_free_is_valid(dst_map));
6924         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6925                 vm_map_min(dst_map) : last->vme_end;
6926
6927         while (TRUE) {
6928                 vm_map_entry_t  next = last->vme_next;
6929                 vm_map_offset_t end = start + size;
6930
6931                 if ((end > dst_map->max_offset) || (end < start)) {
6932                         if (dst_map->wait_for_space) {
6933                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6934                                         assert_wait((event_t) dst_map,
6935                                                     THREAD_INTERRUPTIBLE);
6936                                         vm_map_unlock(dst_map);
6937                                         thread_block(THREAD_CONTINUE_NULL);
6938                                         goto StartAgain;
6939                                 }
6940                         }
6941                         vm_map_unlock(dst_map);
6942                         return(KERN_NO_SPACE);
6943                 }
6944
6945                 if ((next == vm_map_to_entry(dst_map)) ||
6946                     (next->vme_start >= end))
6947                         break;
6948
6949                 last = next;
6950                 start = last->vme_end;
6951         }
6952
6953         /*
6954          *      Since we're going to just drop the map
6955          *      entries from the copy into the destination
6956          *      map, they must come from the same pool.
6957          */
6958
6959         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6960                 /*
6961                  * Mismatches occur when dealing with the default
6962                  * pager.
6963                  */
6964                 zone_t          old_zone;
6965                 vm_map_entry_t  next, new;
6966
6967                 /*
6968                  * Find the zone that the copies were allocated from
6969                  */
6970                 old_zone = (copy->cpy_hdr.entries_pageable)
6971                         ? vm_map_entry_zone
6972                         : vm_map_kentry_zone;
6973                 entry = vm_map_copy_first_entry(copy);
6974
6975                 /*
6976                  * Reinitialize the copy so that vm_map_copy_entry_link
6977                  * will work.
6978                  */
6979                 copy->cpy_hdr.nentries = 0;
6980                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6981                 vm_map_copy_first_entry(copy) =
6982                         vm_map_copy_last_entry(copy) =
6983                         vm_map_copy_to_entry(copy);
6984
6985                 /*
6986                  * Copy each entry.
6987                  */
6988                 while (entry != vm_map_copy_to_entry(copy)) {
6989                         new = vm_map_copy_entry_create(copy);
6990                         vm_map_entry_copy_full(new, entry);
6991                         new->use_pmap = FALSE;  /* clr address space specifics */
6992                         vm_map_copy_entry_link(copy,
6993                                                vm_map_copy_last_entry(copy),
6994                                                new);
6995                         next = entry->vme_next;
6996                         zfree(old_zone, entry);
6997                         entry = next;
6998                 }
6999         }
7000
7001         /*
7002          *      Adjust the addresses in the copy chain, and
7003          *      reset the region attributes.
7004          */
7005
7006         adjustment = start - vm_copy_start;
7007         for (entry = vm_map_copy_first_entry(copy);
7008              entry != vm_map_copy_to_entry(copy);
7009              entry = entry->vme_next) {
7010                 entry->vme_start += adjustment;
7011                 entry->vme_end += adjustment;
7012
7013                 entry->inheritance = VM_INHERIT_DEFAULT;
7014                 entry->protection = VM_PROT_DEFAULT;
7015                 entry->max_protection = VM_PROT_ALL;
7016                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7017
7018                 /*
7019                  * If the entry is now wired,
7020                  * map the pages into the destination map.
7021                  */
7022                 if (entry->wired_count != 0) {
7023                         register vm_map_offset_t va;
7024                         vm_object_offset_t       offset;
7025                         register vm_object_t object;
7026                         vm_prot_t prot;
7027                         int     type_of_fault;
7028
7029                         object = entry->object.vm_object;
7030                         offset = entry->offset;
7031                         va = entry->vme_start;
7032
7033                         pmap_pageable(dst_map->pmap,
7034                                       entry->vme_start,
7035                                       entry->vme_end,
7036                                       TRUE);
7037
7038                         while (va < entry->vme_end) {
7039                                 register vm_page_t      m;
7040
7041                                 /*
7042                                  * Look up the page in the object.
7043                                  * Assert that the page will be found in the
7044                                  * top object:
7045                                  * either
7046                                  *      the object was newly created by
7047                                  *      vm_object_copy_slowly, and has
7048                                  *      copies of all of the pages from
7049                                  *      the source object
7050                                  * or
7051                                  *      the object was moved from the old
7052                                  *      map entry; because the old map
7053                                  *      entry was wired, all of the pages
7054                                  *      were in the top-level object.
7055                                  *      (XXX not true if we wire pages for
7056                                  *       reading)
7057                                  */
7058                                 vm_object_lock(object);
7059
7060                                 m = vm_page_lookup(object, offset);
7061                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7062                                     m->absent)
7063                                         panic("vm_map_copyout: wiring %p", m);
7064
7065                                 /*
7066                                  * ENCRYPTED SWAP:
7067                                  * The page is assumed to be wired here, so it
7068                                  * shouldn't be encrypted.  Otherwise, we
7069                                  * couldn't enter it in the page table, since
7070                                  * we don't want the user to see the encrypted
7071                                  * data.
7072                                  */
7073                                 ASSERT_PAGE_DECRYPTED(m);
7074
7075                                 prot = entry->protection;
7076
7077                                 if (override_nx(dst_map, entry->alias) && prot)
7078                                         prot |= VM_PROT_EXECUTE;
7079
7080                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7081
7082                                 vm_fault_enter(m, dst_map->pmap, va, prot,
7083                                                VM_PAGE_WIRED(m), FALSE, FALSE,
7084                                                &type_of_fault);
7085
7086                                 vm_object_unlock(object);
7087
7088                                 offset += PAGE_SIZE_64;
7089                                 va += PAGE_SIZE;
7090                         }
7091                 }
7092         }
7093
7094         /*
7095          *      Correct the page alignment for the result
7096          */
7097
7098         *dst_addr = start + (copy->offset - vm_copy_start);
7099
7100         /*
7101          *      Update the hints and the map size
7102          */
7103
7104         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7105
7106         dst_map->size += size;
7107
7108         /*
7109          *      Link in the copy
7110          */
7111
7112         vm_map_copy_insert(dst_map, last, copy);
7113
7114         vm_map_unlock(dst_map);
7115
7116         /*
7117          * XXX  If wiring_required, call vm_map_pageable
7118          */
7119
7120         return(KERN_SUCCESS);
7121 }
7122
7123 /*
7124  *      Routine:        vm_map_copyin
7125  *
7126  *      Description:
7127  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7128  *
7129  */
7130
7131 #undef vm_map_copyin
7132
7133 kern_return_t
7134 vm_map_copyin(
7135         vm_map_t                        src_map,
7136         vm_map_address_t        src_addr,
7137         vm_map_size_t           len,
7138         boolean_t                       src_destroy,
7139         vm_map_copy_t           *copy_result)   /* OUT */
7140 {
7141         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7142                                         FALSE, copy_result, FALSE));
7143 }
7144
7145 /*
7146  *      Routine:        vm_map_copyin_common
7147  *
7148  *      Description:
7149  *              Copy the specified region (src_addr, len) from the
7150  *              source address space (src_map), possibly removing
7151  *              the region from the source address space (src_destroy).
7152  *
7153  *      Returns:
7154  *              A vm_map_copy_t object (copy_result), suitable for
7155  *              insertion into another address space (using vm_map_copyout),
7156  *              copying over another address space region (using
7157  *              vm_map_copy_overwrite).  If the copy is unused, it
7158  *              should be destroyed (using vm_map_copy_discard).
7159  *
7160  *      In/out conditions:
7161  *              The source map should not be locked on entry.
7162  */
7163
7164 typedef struct submap_map {
7165         vm_map_t        parent_map;
7166         vm_map_offset_t base_start;
7167         vm_map_offset_t base_end;
7168         vm_map_size_t   base_len;
7169         struct submap_map *next;
7170 } submap_map_t;
7171
7172 kern_return_t
7173 vm_map_copyin_common(
7174         vm_map_t        src_map,
7175         vm_map_address_t src_addr,
7176         vm_map_size_t   len,
7177         boolean_t       src_destroy,
7178         __unused boolean_t      src_volatile,
7179         vm_map_copy_t   *copy_result,   /* OUT */
7180         boolean_t       use_maxprot)
7181 {
7182         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7183                                          * in multi-level lookup, this
7184                                          * entry contains the actual
7185                                          * vm_object/offset.
7186                                          */
7187         register
7188         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7189
7190         vm_map_offset_t src_start;      /* Start of current entry --
7191                                          * where copy is taking place now
7192                                          */
7193         vm_map_offset_t src_end;        /* End of entire region to be
7194                                          * copied */
7195         vm_map_offset_t src_base;
7196         vm_map_t        base_map = src_map;
7197         boolean_t       map_share=FALSE;
7198         submap_map_t    *parent_maps = NULL;
7199
7200         register
7201         vm_map_copy_t   copy;           /* Resulting copy */
7202         vm_map_address_t        copy_addr;
7203
7204         /*
7205          *      Check for copies of zero bytes.
7206          */
7207
7208         if (len == 0) {
7209                 *copy_result = VM_MAP_COPY_NULL;
7210                 return(KERN_SUCCESS);
7211         }
7212
7213         /*
7214          *      Check that the end address doesn't overflow
7215          */
7216         src_end = src_addr + len;
7217         if (src_end < src_addr)
7218                 return KERN_INVALID_ADDRESS;
7219
7220         /*
7221          * If the copy is sufficiently small, use a kernel buffer instead
7222          * of making a virtual copy.  The theory being that the cost of
7223          * setting up VM (and taking C-O-W faults) dominates the copy costs
7224          * for small regions.
7225          */
7226         if ((len < msg_ool_size_small) && !use_maxprot)
7227                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7228                                                    src_destroy, copy_result);
7229
7230         /*
7231          *      Compute (page aligned) start and end of region
7232          */
7233         src_start = vm_map_trunc_page(src_addr);
7234         src_end = vm_map_round_page(src_end);
7235
7236         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7237
7238         /*
7239          *      Allocate a header element for the list.
7240          *
7241          *      Use the start and end in the header to
7242          *      remember the endpoints prior to rounding.
7243          */
7244
7245         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7246         vm_map_copy_first_entry(copy) =
7247                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7248         copy->type = VM_MAP_COPY_ENTRY_LIST;
7249         copy->cpy_hdr.nentries = 0;
7250         copy->cpy_hdr.entries_pageable = TRUE;
7251
7252         copy->offset = src_addr;
7253         copy->size = len;
7254
7255         new_entry = vm_map_copy_entry_create(copy);
7256
7257 #define RETURN(x)                                               \
7258         MACRO_BEGIN                                             \
7259         vm_map_unlock(src_map);                                 \
7260         if(src_map != base_map)                                 \
7261                 vm_map_deallocate(src_map);                     \
7262         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7263                 vm_map_copy_entry_dispose(copy,new_entry);      \
7264         vm_map_copy_discard(copy);                              \
7265         {                                                       \
7266                 submap_map_t    *_ptr;                          \
7267                                                                 \
7268                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7269                         parent_maps=parent_maps->next;          \
7270                         if (_ptr->parent_map != base_map)       \
7271                                 vm_map_deallocate(_ptr->parent_map);    \
7272                         kfree(_ptr, sizeof(submap_map_t));      \
7273                 }                                               \
7274         }                                                       \
7275         MACRO_RETURN(x);                                        \
7276         MACRO_END
7277
7278         /*
7279          *      Find the beginning of the region.
7280          */
7281
7282         vm_map_lock(src_map);
7283
7284         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7285                 RETURN(KERN_INVALID_ADDRESS);
7286         if(!tmp_entry->is_sub_map) {
7287                 vm_map_clip_start(src_map, tmp_entry, src_start);
7288         }
7289         /* set for later submap fix-up */
7290         copy_addr = src_start;
7291
7292         /*
7293          *      Go through entries until we get to the end.
7294          */
7295
7296         while (TRUE) {
7297                 register
7298                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7299                 vm_map_size_t   src_size;               /* Size of source
7300                                                          * map entry (in both
7301                                                          * maps)
7302                                                          */
7303
7304                 register
7305                 vm_object_t             src_object;     /* Object to copy */
7306                 vm_object_offset_t      src_offset;
7307
7308                 boolean_t       src_needs_copy;         /* Should source map
7309                                                          * be made read-only
7310                                                          * for copy-on-write?
7311                                                          */
7312
7313                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7314
7315                 boolean_t       was_wired;              /* Was source wired? */
7316                 vm_map_version_t version;               /* Version before locks
7317                                                          * dropped to make copy
7318                                                          */
7319                 kern_return_t   result;                 /* Return value from
7320                                                          * copy_strategically.
7321                                                          */
7322                 while(tmp_entry->is_sub_map) {
7323                         vm_map_size_t submap_len;
7324                         submap_map_t *ptr;
7325
7326                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7327                         ptr->next = parent_maps;
7328                         parent_maps = ptr;
7329                         ptr->parent_map = src_map;
7330                         ptr->base_start = src_start;
7331                         ptr->base_end = src_end;
7332                         submap_len = tmp_entry->vme_end - src_start;
7333                         if(submap_len > (src_end-src_start))
7334                                 submap_len = src_end-src_start;
7335                         ptr->base_len = submap_len;
7336
7337                         src_start -= tmp_entry->vme_start;
7338                         src_start += tmp_entry->offset;
7339                         src_end = src_start + submap_len;
7340                         src_map = tmp_entry->object.sub_map;
7341                         vm_map_lock(src_map);
7342                         /* keep an outstanding reference for all maps in */
7343                         /* the parents tree except the base map */
7344                         vm_map_reference(src_map);
7345                         vm_map_unlock(ptr->parent_map);
7346                         if (!vm_map_lookup_entry(
7347                                     src_map, src_start, &tmp_entry))
7348                                 RETURN(KERN_INVALID_ADDRESS);
7349                         map_share = TRUE;
7350                         if(!tmp_entry->is_sub_map)
7351                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7352                         src_entry = tmp_entry;
7353                 }
7354                 /* we are now in the lowest level submap... */
7355
7356                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7357                     (tmp_entry->object.vm_object->phys_contiguous)) {
7358                         /* This is not, supported for now.In future */
7359                         /* we will need to detect the phys_contig   */
7360                         /* condition and then upgrade copy_slowly   */
7361                         /* to do physical copy from the device mem  */
7362                         /* based object. We can piggy-back off of   */
7363                         /* the was wired boolean to set-up the      */
7364                         /* proper handling */
7365                         RETURN(KERN_PROTECTION_FAILURE);
7366                 }
7367                 /*
7368                  *      Create a new address map entry to hold the result.
7369                  *      Fill in the fields from the appropriate source entries.
7370                  *      We must unlock the source map to do this if we need
7371                  *      to allocate a map entry.
7372                  */
7373                 if (new_entry == VM_MAP_ENTRY_NULL) {
7374                         version.main_timestamp = src_map->timestamp;
7375                         vm_map_unlock(src_map);
7376
7377                         new_entry = vm_map_copy_entry_create(copy);
7378
7379                         vm_map_lock(src_map);
7380                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7381                                 if (!vm_map_lookup_entry(src_map, src_start,
7382                                                          &tmp_entry)) {
7383                                         RETURN(KERN_INVALID_ADDRESS);
7384                                 }
7385                                 if (!tmp_entry->is_sub_map)
7386                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7387                                 continue; /* restart w/ new tmp_entry */
7388                         }
7389                 }
7390
7391                 /*
7392                  *      Verify that the region can be read.
7393                  */
7394                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7395                      !use_maxprot) ||
7396                     (src_entry->max_protection & VM_PROT_READ) == 0)
7397                         RETURN(KERN_PROTECTION_FAILURE);
7398
7399                 /*
7400                  *      Clip against the endpoints of the entire region.
7401                  */
7402
7403                 vm_map_clip_end(src_map, src_entry, src_end);
7404
7405                 src_size = src_entry->vme_end - src_start;
7406                 src_object = src_entry->object.vm_object;
7407                 src_offset = src_entry->offset;
7408                 was_wired = (src_entry->wired_count != 0);
7409
7410                 vm_map_entry_copy(new_entry, src_entry);
7411                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7412
7413                 /*
7414                  *      Attempt non-blocking copy-on-write optimizations.
7415                  */
7416
7417                 if (src_destroy &&
7418                     (src_object == VM_OBJECT_NULL ||
7419                      (src_object->internal && !src_object->true_share
7420                       && !map_share))) {
7421                         /*
7422                          * If we are destroying the source, and the object
7423                          * is internal, we can move the object reference
7424                          * from the source to the copy.  The copy is
7425                          * copy-on-write only if the source is.
7426                          * We make another reference to the object, because
7427                          * destroying the source entry will deallocate it.
7428                          */
7429                         vm_object_reference(src_object);
7430
7431                         /*
7432                          * Copy is always unwired.  vm_map_copy_entry
7433                          * set its wired count to zero.
7434                          */
7435
7436                         goto CopySuccessful;
7437                 }
7438
7439
7440         RestartCopy:
7441                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7442                     src_object, new_entry, new_entry->object.vm_object,
7443                     was_wired, 0);
7444                 if ((src_object == VM_OBJECT_NULL ||
7445                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7446                     vm_object_copy_quickly(
7447                             &new_entry->object.vm_object,
7448                             src_offset,
7449                             src_size,
7450                             &src_needs_copy,
7451                             &new_entry_needs_copy)) {
7452
7453                         new_entry->needs_copy = new_entry_needs_copy;
7454
7455                         /*
7456                          *      Handle copy-on-write obligations
7457                          */
7458
7459                         if (src_needs_copy && !tmp_entry->needs_copy) {
7460                                 vm_prot_t prot;
7461
7462                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7463
7464                                 if (override_nx(src_map, src_entry->alias) && prot)
7465                                         prot |= VM_PROT_EXECUTE;
7466
7467                                 vm_object_pmap_protect(
7468                                         src_object,
7469                                         src_offset,
7470                                         src_size,
7471                                         (src_entry->is_shared ?
7472                                          PMAP_NULL
7473                                          : src_map->pmap),
7474                                         src_entry->vme_start,
7475                                         prot);
7476
7477                                 tmp_entry->needs_copy = TRUE;
7478                         }
7479
7480                         /*
7481                          *      The map has never been unlocked, so it's safe
7482                          *      to move to the next entry rather than doing
7483                          *      another lookup.
7484                          */
7485
7486                         goto CopySuccessful;
7487                 }
7488
7489                 /*
7490                  *      Take an object reference, so that we may
7491                  *      release the map lock(s).
7492                  */
7493
7494                 assert(src_object != VM_OBJECT_NULL);
7495                 vm_object_reference(src_object);
7496
7497                 /*
7498                  *      Record the timestamp for later verification.
7499                  *      Unlock the map.
7500                  */
7501
7502                 version.main_timestamp = src_map->timestamp;
7503                 vm_map_unlock(src_map); /* Increments timestamp once! */
7504
7505                 /*
7506                  *      Perform the copy
7507                  */
7508
7509                 if (was_wired) {
7510                 CopySlowly:
7511                         vm_object_lock(src_object);
7512                         result = vm_object_copy_slowly(
7513                                 src_object,
7514                                 src_offset,
7515                                 src_size,
7516                                 THREAD_UNINT,
7517                                 &new_entry->object.vm_object);
7518                         new_entry->offset = 0;
7519                         new_entry->needs_copy = FALSE;
7520
7521                 }
7522                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7523                          (tmp_entry->is_shared  || map_share)) {
7524                         vm_object_t new_object;
7525
7526                         vm_object_lock_shared(src_object);
7527                         new_object = vm_object_copy_delayed(
7528                                 src_object,
7529                                 src_offset,
7530                                 src_size,
7531                                 TRUE);
7532                         if (new_object == VM_OBJECT_NULL)
7533                                 goto CopySlowly;
7534
7535                         new_entry->object.vm_object = new_object;
7536                         new_entry->needs_copy = TRUE;
7537                         result = KERN_SUCCESS;
7538
7539                 } else {
7540                         result = vm_object_copy_strategically(src_object,
7541                                                               src_offset,
7542                                                               src_size,
7543                                                               &new_entry->object.vm_object,
7544                                                               &new_entry->offset,
7545                                                               &new_entry_needs_copy);
7546
7547                         new_entry->needs_copy = new_entry_needs_copy;
7548                 }
7549
7550                 if (result != KERN_SUCCESS &&
7551                     result != KERN_MEMORY_RESTART_COPY) {
7552                         vm_map_lock(src_map);
7553                         RETURN(result);
7554                 }
7555
7556                 /*
7557                  *      Throw away the extra reference
7558                  */
7559
7560                 vm_object_deallocate(src_object);
7561
7562                 /*
7563                  *      Verify that the map has not substantially
7564                  *      changed while the copy was being made.
7565                  */
7566
7567                 vm_map_lock(src_map);
7568
7569                 if ((version.main_timestamp + 1) == src_map->timestamp)
7570                         goto VerificationSuccessful;
7571
7572                 /*
7573                  *      Simple version comparison failed.
7574                  *
7575                  *      Retry the lookup and verify that the
7576                  *      same object/offset are still present.
7577                  *
7578                  *      [Note: a memory manager that colludes with
7579                  *      the calling task can detect that we have
7580                  *      cheated.  While the map was unlocked, the
7581                  *      mapping could have been changed and restored.]
7582                  */
7583
7584                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7585                         RETURN(KERN_INVALID_ADDRESS);
7586                 }
7587
7588                 src_entry = tmp_entry;
7589                 vm_map_clip_start(src_map, src_entry, src_start);
7590
7591                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7592                      !use_maxprot) ||
7593                     ((src_entry->max_protection & VM_PROT_READ) == 0))
7594                         goto VerificationFailed;
7595
7596                 if (src_entry->vme_end < new_entry->vme_end)
7597                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7598
7599                 if ((src_entry->object.vm_object != src_object) ||
7600                     (src_entry->offset != src_offset) ) {
7601
7602                         /*
7603                          *      Verification failed.
7604                          *
7605                          *      Start over with this top-level entry.
7606                          */
7607
7608                 VerificationFailed: ;
7609
7610                         vm_object_deallocate(new_entry->object.vm_object);
7611                         tmp_entry = src_entry;
7612                         continue;
7613                 }
7614
7615                 /*
7616                  *      Verification succeeded.
7617                  */
7618
7619         VerificationSuccessful: ;
7620
7621                 if (result == KERN_MEMORY_RESTART_COPY)
7622                         goto RestartCopy;
7623
7624                 /*
7625                  *      Copy succeeded.
7626                  */
7627
7628         CopySuccessful: ;
7629
7630                 /*
7631                  *      Link in the new copy entry.
7632                  */
7633
7634                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7635                                        new_entry);
7636
7637                 /*
7638                  *      Determine whether the entire region
7639                  *      has been copied.
7640                  */
7641                 src_base = src_start;
7642                 src_start = new_entry->vme_end;
7643                 new_entry = VM_MAP_ENTRY_NULL;
7644                 while ((src_start >= src_end) && (src_end != 0)) {
7645                         if (src_map != base_map) {
7646                                 submap_map_t    *ptr;
7647
7648                                 ptr = parent_maps;
7649                                 assert(ptr != NULL);
7650                                 parent_maps = parent_maps->next;
7651
7652                                 /* fix up the damage we did in that submap */
7653                                 vm_map_simplify_range(src_map,
7654                                                       src_base,
7655                                                       src_end);
7656
7657                                 vm_map_unlock(src_map);
7658                                 vm_map_deallocate(src_map);
7659                                 vm_map_lock(ptr->parent_map);
7660                                 src_map = ptr->parent_map;
7661                                 src_base = ptr->base_start;
7662                                 src_start = ptr->base_start + ptr->base_len;
7663                                 src_end = ptr->base_end;
7664                                 if ((src_end > src_start) &&
7665                                     !vm_map_lookup_entry(
7666                                             src_map, src_start, &tmp_entry))
7667                                         RETURN(KERN_INVALID_ADDRESS);
7668                                 kfree(ptr, sizeof(submap_map_t));
7669                                 if(parent_maps == NULL)
7670                                         map_share = FALSE;
7671                                 src_entry = tmp_entry->vme_prev;
7672                         } else
7673                                 break;
7674                 }
7675                 if ((src_start >= src_end) && (src_end != 0))
7676                         break;
7677
7678                 /*
7679                  *      Verify that there are no gaps in the region
7680                  */
7681
7682                 tmp_entry = src_entry->vme_next;
7683                 if ((tmp_entry->vme_start != src_start) ||
7684                     (tmp_entry == vm_map_to_entry(src_map)))
7685                         RETURN(KERN_INVALID_ADDRESS);
7686         }
7687
7688         /*
7689          * If the source should be destroyed, do it now, since the
7690          * copy was successful.
7691          */
7692         if (src_destroy) {
7693                 (void) vm_map_delete(src_map,
7694                                      vm_map_trunc_page(src_addr),
7695                                      src_end,
7696                                      (src_map == kernel_map) ?
7697                                      VM_MAP_REMOVE_KUNWIRE :
7698                                      VM_MAP_NO_FLAGS,
7699                                      VM_MAP_NULL);
7700         } else {
7701                 /* fix up the damage we did in the base map */
7702                 vm_map_simplify_range(src_map,
7703                                       vm_map_trunc_page(src_addr),
7704                                       vm_map_round_page(src_end));
7705         }
7706
7707         vm_map_unlock(src_map);
7708
7709         /* Fix-up start and end points in copy.  This is necessary */
7710         /* when the various entries in the copy object were picked */
7711         /* up from different sub-maps */
7712
7713         tmp_entry = vm_map_copy_first_entry(copy);
7714         while (tmp_entry != vm_map_copy_to_entry(copy)) {
7715                 tmp_entry->vme_end = copy_addr +
7716                         (tmp_entry->vme_end - tmp_entry->vme_start);
7717                 tmp_entry->vme_start = copy_addr;
7718                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7719                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7720         }
7721
7722         *copy_result = copy;
7723         return(KERN_SUCCESS);
7724
7725 #undef  RETURN
7726 }
7727
7728 /*
7729  *      vm_map_copyin_object:
7730  *
7731  *      Create a copy object from an object.
7732  *      Our caller donates an object reference.
7733  */
7734
7735 kern_return_t
7736 vm_map_copyin_object(
7737         vm_object_t             object,
7738         vm_object_offset_t      offset, /* offset of region in object */
7739         vm_object_size_t        size,   /* size of region in object */
7740         vm_map_copy_t   *copy_result)   /* OUT */
7741 {
7742         vm_map_copy_t   copy;           /* Resulting copy */
7743
7744         /*
7745          *      We drop the object into a special copy object
7746          *      that contains the object directly.
7747          */
7748
7749         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7750         copy->type = VM_MAP_COPY_OBJECT;
7751         copy->cpy_object = object;
7752         copy->offset = offset;
7753         copy->size = size;
7754
7755         *copy_result = copy;
7756         return(KERN_SUCCESS);
7757 }
7758
7759 static void
7760 vm_map_fork_share(
7761         vm_map_t        old_map,
7762         vm_map_entry_t  old_entry,
7763         vm_map_t        new_map)
7764 {
7765         vm_object_t     object;
7766         vm_map_entry_t  new_entry;
7767
7768         /*
7769          *      New sharing code.  New map entry
7770          *      references original object.  Internal
7771          *      objects use asynchronous copy algorithm for
7772          *      future copies.  First make sure we have
7773          *      the right object.  If we need a shadow,
7774          *      or someone else already has one, then
7775          *      make a new shadow and share it.
7776          */
7777
7778         object = old_entry->object.vm_object;
7779         if (old_entry->is_sub_map) {
7780                 assert(old_entry->wired_count == 0);
7781 #ifndef NO_NESTED_PMAP
7782                 if(old_entry->use_pmap) {
7783                         kern_return_t   result;
7784
7785                         result = pmap_nest(new_map->pmap,
7786                                            (old_entry->object.sub_map)->pmap,
7787                                            (addr64_t)old_entry->vme_start,
7788                                            (addr64_t)old_entry->vme_start,
7789                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7790                         if(result)
7791                                 panic("vm_map_fork_share: pmap_nest failed!");
7792                 }
7793 #endif  /* NO_NESTED_PMAP */
7794         } else if (object == VM_OBJECT_NULL) {
7795                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7796                                                             old_entry->vme_start));
7797                 old_entry->offset = 0;
7798                 old_entry->object.vm_object = object;
7799                 assert(!old_entry->needs_copy);
7800         } else if (object->copy_strategy !=
7801                    MEMORY_OBJECT_COPY_SYMMETRIC) {
7802
7803                 /*
7804                  *      We are already using an asymmetric
7805                  *      copy, and therefore we already have
7806                  *      the right object.
7807                  */
7808
7809                 assert(! old_entry->needs_copy);
7810         }
7811         else if (old_entry->needs_copy ||       /* case 1 */
7812                  object->shadowed ||            /* case 2 */
7813                  (!object->true_share &&        /* case 3 */
7814                   !old_entry->is_shared &&
7815                   (object->size >
7816                    (vm_map_size_t)(old_entry->vme_end -
7817                                    old_entry->vme_start)))) {
7818
7819                 /*
7820                  *      We need to create a shadow.
7821                  *      There are three cases here.
7822                  *      In the first case, we need to
7823                  *      complete a deferred symmetrical
7824                  *      copy that we participated in.
7825                  *      In the second and third cases,
7826                  *      we need to create the shadow so
7827                  *      that changes that we make to the
7828                  *      object do not interfere with
7829                  *      any symmetrical copies which
7830                  *      have occured (case 2) or which
7831                  *      might occur (case 3).
7832                  *
7833                  *      The first case is when we had
7834                  *      deferred shadow object creation
7835                  *      via the entry->needs_copy mechanism.
7836                  *      This mechanism only works when
7837                  *      only one entry points to the source
7838                  *      object, and we are about to create
7839                  *      a second entry pointing to the
7840                  *      same object. The problem is that
7841                  *      there is no way of mapping from
7842                  *      an object to the entries pointing
7843                  *      to it. (Deferred shadow creation
7844                  *      works with one entry because occurs
7845                  *      at fault time, and we walk from the
7846                  *      entry to the object when handling
7847                  *      the fault.)
7848                  *
7849                  *      The second case is when the object
7850                  *      to be shared has already been copied
7851                  *      with a symmetric copy, but we point
7852                  *      directly to the object without
7853                  *      needs_copy set in our entry. (This
7854                  *      can happen because different ranges
7855                  *      of an object can be pointed to by
7856                  *      different entries. In particular,
7857                  *      a single entry pointing to an object
7858                  *      can be split by a call to vm_inherit,
7859                  *      which, combined with task_create, can
7860                  *      result in the different entries
7861                  *      having different needs_copy values.)
7862                  *      The shadowed flag in the object allows
7863                  *      us to detect this case. The problem
7864                  *      with this case is that if this object
7865                  *      has or will have shadows, then we
7866                  *      must not perform an asymmetric copy
7867                  *      of this object, since such a copy
7868                  *      allows the object to be changed, which
7869                  *      will break the previous symmetrical
7870                  *      copies (which rely upon the object
7871                  *      not changing). In a sense, the shadowed
7872                  *      flag says "don't change this object".
7873                  *      We fix this by creating a shadow
7874                  *      object for this object, and sharing
7875                  *      that. This works because we are free
7876                  *      to change the shadow object (and thus
7877                  *      to use an asymmetric copy strategy);
7878                  *      this is also semantically correct,
7879                  *      since this object is temporary, and
7880                  *      therefore a copy of the object is
7881                  *      as good as the object itself. (This
7882                  *      is not true for permanent objects,
7883                  *      since the pager needs to see changes,
7884                  *      which won't happen if the changes
7885                  *      are made to a copy.)
7886                  *
7887                  *      The third case is when the object
7888                  *      to be shared has parts sticking
7889                  *      outside of the entry we're working
7890                  *      with, and thus may in the future
7891                  *      be subject to a symmetrical copy.
7892                  *      (This is a preemptive version of
7893                  *      case 2.)
7894                  */
7895
7896                 vm_object_shadow(&old_entry->object.vm_object,
7897                                  &old_entry->offset,
7898                                  (vm_map_size_t) (old_entry->vme_end -
7899                                                   old_entry->vme_start));
7900
7901                 /*
7902                  *      If we're making a shadow for other than
7903                  *      copy on write reasons, then we have
7904                  *      to remove write permission.
7905                  */
7906
7907                 if (!old_entry->needs_copy &&
7908                     (old_entry->protection & VM_PROT_WRITE)) {
7909                         vm_prot_t prot;
7910
7911                         prot = old_entry->protection & ~VM_PROT_WRITE;
7912
7913                         if (override_nx(old_map, old_entry->alias) && prot)
7914                                 prot |= VM_PROT_EXECUTE;
7915
7916                         if (old_map->mapped) {
7917                                 vm_object_pmap_protect(
7918                                         old_entry->object.vm_object,
7919                                         old_entry->offset,
7920                                         (old_entry->vme_end -
7921                                          old_entry->vme_start),
7922                                         PMAP_NULL,
7923                                         old_entry->vme_start,
7924                                         prot);
7925                         } else {
7926                                 pmap_protect(old_map->pmap,
7927                                              old_entry->vme_start,
7928                                              old_entry->vme_end,
7929                                              prot);
7930                         }
7931                 }
7932
7933                 old_entry->needs_copy = FALSE;
7934                 object = old_entry->object.vm_object;
7935         }
7936
7937         /*
7938          *      If object was using a symmetric copy strategy,
7939          *      change its copy strategy to the default
7940          *      asymmetric copy strategy, which is copy_delay
7941          *      in the non-norma case and copy_call in the
7942          *      norma case. Bump the reference count for the
7943          *      new entry.
7944          */
7945
7946         if(old_entry->is_sub_map) {
7947                 vm_map_lock(old_entry->object.sub_map);
7948                 vm_map_reference(old_entry->object.sub_map);
7949                 vm_map_unlock(old_entry->object.sub_map);
7950         } else {
7951                 vm_object_lock(object);
7952                 vm_object_reference_locked(object);
7953                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7954                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7955                 }
7956                 vm_object_unlock(object);
7957         }
7958
7959         /*
7960          *      Clone the entry, using object ref from above.
7961          *      Mark both entries as shared.
7962          */
7963
7964         new_entry = vm_map_entry_create(new_map);
7965         vm_map_entry_copy(new_entry, old_entry);
7966         old_entry->is_shared = TRUE;
7967         new_entry->is_shared = TRUE;
7968
7969         /*
7970          *      Insert the entry into the new map -- we
7971          *      know we're inserting at the end of the new
7972          *      map.
7973          */
7974
7975         vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7976
7977         /*
7978          *      Update the physical map
7979          */
7980
7981         if (old_entry->is_sub_map) {
7982                 /* Bill Angell pmap support goes here */
7983         } else {
7984                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7985                           old_entry->vme_end - old_entry->vme_start,
7986                           old_entry->vme_start);
7987         }
7988 }
7989
7990 static boolean_t
7991 vm_map_fork_copy(
7992         vm_map_t        old_map,
7993         vm_map_entry_t  *old_entry_p,
7994         vm_map_t        new_map)
7995 {
7996         vm_map_entry_t old_entry = *old_entry_p;
7997         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
7998         vm_map_offset_t start = old_entry->vme_start;
7999         vm_map_copy_t copy;
8000         vm_map_entry_t last = vm_map_last_entry(new_map);
8001
8002         vm_map_unlock(old_map);
8003         /*
8004          *      Use maxprot version of copyin because we
8005          *      care about whether this memory can ever
8006          *      be accessed, not just whether it's accessible
8007          *      right now.
8008          */
8009         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8010             != KERN_SUCCESS) {
8011                 /*
8012                  *      The map might have changed while it
8013                  *      was unlocked, check it again.  Skip
8014                  *      any blank space or permanently
8015                  *      unreadable region.
8016                  */
8017                 vm_map_lock(old_map);
8018                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8019                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8020                         last = last->vme_next;
8021                 }
8022                 *old_entry_p = last;
8023
8024                 /*
8025                  * XXX  For some error returns, want to
8026                  * XXX  skip to the next element.  Note
8027                  *      that INVALID_ADDRESS and
8028                  *      PROTECTION_FAILURE are handled above.
8029                  */
8030
8031                 return FALSE;
8032         }
8033
8034         /*
8035          *      Insert the copy into the new map
8036          */
8037
8038         vm_map_copy_insert(new_map, last, copy);
8039
8040         /*
8041          *      Pick up the traversal at the end of
8042          *      the copied region.
8043          */
8044
8045         vm_map_lock(old_map);
8046         start += entry_size;
8047         if (! vm_map_lookup_entry(old_map, start, &last)) {
8048                 last = last->vme_next;
8049         } else {
8050                 if (last->vme_start == start) {
8051                         /*
8052                          * No need to clip here and we don't
8053                          * want to cause any unnecessary
8054                          * unnesting...
8055                          */
8056                 } else {
8057                         vm_map_clip_start(old_map, last, start);
8058                 }
8059         }
8060         *old_entry_p = last;
8061
8062         return TRUE;
8063 }
8064
8065 /*
8066  *      vm_map_fork:
8067  *
8068  *      Create and return a new map based on the old
8069  *      map, according to the inheritance values on the
8070  *      regions in that map.
8071  *
8072  *      The source map must not be locked.
8073  */
8074 vm_map_t
8075 vm_map_fork(
8076         vm_map_t        old_map)
8077 {
8078         pmap_t          new_pmap;
8079         vm_map_t        new_map;
8080         vm_map_entry_t  old_entry;
8081         vm_map_size_t   new_size = 0, entry_size;
8082         vm_map_entry_t  new_entry;
8083         boolean_t       src_needs_copy;
8084         boolean_t       new_entry_needs_copy;
8085
8086         new_pmap = pmap_create((vm_map_size_t) 0,
8087 #if defined(__i386__) || defined(__x86_64__)
8088                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8089 #else
8090                                0
8091 #endif
8092                                );
8093 #if defined(__i386__)
8094         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8095                 pmap_set_4GB_pagezero(new_pmap);
8096 #endif
8097
8098         vm_map_reference_swap(old_map);
8099         vm_map_lock(old_map);
8100
8101         new_map = vm_map_create(new_pmap,
8102                                 old_map->min_offset,
8103                                 old_map->max_offset,
8104                                 old_map->hdr.entries_pageable);
8105
8106         for (
8107                 old_entry = vm_map_first_entry(old_map);
8108                 old_entry != vm_map_to_entry(old_map);
8109                 ) {
8110
8111                 entry_size = old_entry->vme_end - old_entry->vme_start;
8112
8113                 switch (old_entry->inheritance) {
8114                 case VM_INHERIT_NONE:
8115                         break;
8116
8117                 case VM_INHERIT_SHARE:
8118                         vm_map_fork_share(old_map, old_entry, new_map);
8119                         new_size += entry_size;
8120                         break;
8121
8122                 case VM_INHERIT_COPY:
8123
8124                         /*
8125                          *      Inline the copy_quickly case;
8126                          *      upon failure, fall back on call
8127                          *      to vm_map_fork_copy.
8128                          */
8129
8130                         if(old_entry->is_sub_map)
8131                                 break;
8132                         if ((old_entry->wired_count != 0) ||
8133                             ((old_entry->object.vm_object != NULL) &&
8134                              (old_entry->object.vm_object->true_share))) {
8135                                 goto slow_vm_map_fork_copy;
8136                         }
8137
8138                         new_entry = vm_map_entry_create(new_map);
8139                         vm_map_entry_copy(new_entry, old_entry);
8140                         /* clear address space specifics */
8141                         new_entry->use_pmap = FALSE;
8142
8143                         if (! vm_object_copy_quickly(
8144                                     &new_entry->object.vm_object,
8145                                     old_entry->offset,
8146                                     (old_entry->vme_end -
8147                                      old_entry->vme_start),
8148                                     &src_needs_copy,
8149                                     &new_entry_needs_copy)) {
8150                                 vm_map_entry_dispose(new_map, new_entry);
8151                                 goto slow_vm_map_fork_copy;
8152                         }
8153
8154                         /*
8155                          *      Handle copy-on-write obligations
8156                          */
8157
8158                         if (src_needs_copy && !old_entry->needs_copy) {
8159                                 vm_prot_t prot;
8160
8161                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8162
8163                                 if (override_nx(old_map, old_entry->alias) && prot)
8164                                         prot |= VM_PROT_EXECUTE;
8165
8166                                 vm_object_pmap_protect(
8167                                         old_entry->object.vm_object,
8168                                         old_entry->offset,
8169                                         (old_entry->vme_end -
8170                                          old_entry->vme_start),
8171                                         ((old_entry->is_shared
8172                                           || old_map->mapped)
8173                                          ? PMAP_NULL :
8174                                          old_map->pmap),
8175                                         old_entry->vme_start,
8176                                         prot);
8177
8178                                 old_entry->needs_copy = TRUE;
8179                         }
8180                         new_entry->needs_copy = new_entry_needs_copy;
8181
8182                         /*
8183                          *      Insert the entry at the end
8184                          *      of the map.
8185                          */
8186
8187                         vm_map_entry_link(new_map, vm_map_last_entry(new_map),
8188                                           new_entry);
8189                         new_size += entry_size;
8190                         break;
8191
8192                 slow_vm_map_fork_copy:
8193                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8194                                 new_size += entry_size;
8195                         }
8196                         continue;
8197                 }
8198                 old_entry = old_entry->vme_next;
8199         }
8200
8201         new_map->size = new_size;
8202         vm_map_unlock(old_map);
8203         vm_map_deallocate(old_map);
8204
8205         return(new_map);
8206 }
8207
8208 /*
8209  * vm_map_exec:
8210  *
8211  *      Setup the "new_map" with the proper execution environment according
8212  *      to the type of executable (platform, 64bit, chroot environment).
8213  *      Map the comm page and shared region, etc...
8214  */
8215 kern_return_t
8216 vm_map_exec(
8217         vm_map_t        new_map,
8218         task_t          task,
8219         void            *fsroot,
8220         cpu_type_t      cpu)
8221 {
8222         SHARED_REGION_TRACE_DEBUG(
8223                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8224                  current_task(), new_map, task, fsroot, cpu));
8225         (void) vm_commpage_enter(new_map, task);
8226         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8227         SHARED_REGION_TRACE_DEBUG(
8228                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8229                  current_task(), new_map, task, fsroot, cpu));
8230         return KERN_SUCCESS;
8231 }
8232
8233 /*
8234  *      vm_map_lookup_locked:
8235  *
8236  *      Finds the VM object, offset, and
8237  *      protection for a given virtual address in the
8238  *      specified map, assuming a page fault of the
8239  *      type specified.
8240  *
8241  *      Returns the (object, offset, protection) for
8242  *      this address, whether it is wired down, and whether
8243  *      this map has the only reference to the data in question.
8244  *      In order to later verify this lookup, a "version"
8245  *      is returned.
8246  *
8247  *      The map MUST be locked by the caller and WILL be
8248  *      locked on exit.  In order to guarantee the
8249  *      existence of the returned object, it is returned
8250  *      locked.
8251  *
8252  *      If a lookup is requested with "write protection"
8253  *      specified, the map may be changed to perform virtual
8254  *      copying operations, although the data referenced will
8255  *      remain the same.
8256  */
8257 kern_return_t
8258 vm_map_lookup_locked(
8259         vm_map_t                *var_map,       /* IN/OUT */
8260         vm_map_offset_t         vaddr,
8261         vm_prot_t               fault_type,
8262         int                     object_lock_type,
8263         vm_map_version_t        *out_version,   /* OUT */
8264         vm_object_t             *object,        /* OUT */
8265         vm_object_offset_t      *offset,        /* OUT */
8266         vm_prot_t               *out_prot,      /* OUT */
8267         boolean_t               *wired,         /* OUT */
8268         vm_object_fault_info_t  fault_info,     /* OUT */
8269         vm_map_t                *real_map)
8270 {
8271         vm_map_entry_t                  entry;
8272         register vm_map_t               map = *var_map;
8273         vm_map_t                        old_map = *var_map;
8274         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8275         vm_map_offset_t                 cow_parent_vaddr = 0;
8276         vm_map_offset_t                 old_start = 0;
8277         vm_map_offset_t                 old_end = 0;
8278         register vm_prot_t              prot;
8279
8280         *real_map = map;
8281 RetryLookup: ;
8282
8283         /*
8284          *      If the map has an interesting hint, try it before calling
8285          *      full blown lookup routine.
8286          */
8287         entry = map->hint;
8288
8289         if ((entry == vm_map_to_entry(map)) ||
8290             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8291                 vm_map_entry_t  tmp_entry;
8292
8293                 /*
8294                  *      Entry was either not a valid hint, or the vaddr
8295                  *      was not contained in the entry, so do a full lookup.
8296                  */
8297                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8298                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8299                                 vm_map_unlock(cow_sub_map_parent);
8300                         if((*real_map != map)
8301                            && (*real_map != cow_sub_map_parent))
8302                                 vm_map_unlock(*real_map);
8303                         return KERN_INVALID_ADDRESS;
8304                 }
8305
8306                 entry = tmp_entry;
8307         }
8308         if(map == old_map) {
8309                 old_start = entry->vme_start;
8310                 old_end = entry->vme_end;
8311         }
8312
8313         /*
8314          *      Handle submaps.  Drop lock on upper map, submap is
8315          *      returned locked.
8316          */
8317
8318 submap_recurse:
8319         if (entry->is_sub_map) {
8320                 vm_map_offset_t         local_vaddr;
8321                 vm_map_offset_t         end_delta;
8322                 vm_map_offset_t         start_delta;
8323                 vm_map_entry_t          submap_entry;
8324                 boolean_t               mapped_needs_copy=FALSE;
8325
8326                 local_vaddr = vaddr;
8327
8328                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8329                         /* if real_map equals map we unlock below */
8330                         if ((*real_map != map) &&
8331                             (*real_map != cow_sub_map_parent))
8332                                 vm_map_unlock(*real_map);
8333                         *real_map = entry->object.sub_map;
8334                 }
8335
8336                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8337                         if (!mapped_needs_copy) {
8338                                 if (vm_map_lock_read_to_write(map)) {
8339                                         vm_map_lock_read(map);
8340                                         /* XXX FBDP: entry still valid ? */
8341                                         if(*real_map == entry->object.sub_map)
8342                                                 *real_map = map;
8343                                         goto RetryLookup;
8344                                 }
8345                                 vm_map_lock_read(entry->object.sub_map);
8346                                 cow_sub_map_parent = map;
8347                                 /* reset base to map before cow object */
8348                                 /* this is the map which will accept   */
8349                                 /* the new cow object */
8350                                 old_start = entry->vme_start;
8351                                 old_end = entry->vme_end;
8352                                 cow_parent_vaddr = vaddr;
8353                                 mapped_needs_copy = TRUE;
8354                         } else {
8355                                 vm_map_lock_read(entry->object.sub_map);
8356                                 if((cow_sub_map_parent != map) &&
8357                                    (*real_map != map))
8358                                         vm_map_unlock(map);
8359                         }
8360                 } else {
8361                         vm_map_lock_read(entry->object.sub_map);
8362                         /* leave map locked if it is a target */
8363                         /* cow sub_map above otherwise, just  */
8364                         /* follow the maps down to the object */
8365                         /* here we unlock knowing we are not  */
8366                         /* revisiting the map.  */
8367                         if((*real_map != map) && (map != cow_sub_map_parent))
8368                                 vm_map_unlock_read(map);
8369                 }
8370
8371                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8372                 *var_map = map = entry->object.sub_map;
8373
8374                 /* calculate the offset in the submap for vaddr */
8375                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8376
8377         RetrySubMap:
8378                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8379                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8380                                 vm_map_unlock(cow_sub_map_parent);
8381                         }
8382                         if((*real_map != map)
8383                            && (*real_map != cow_sub_map_parent)) {
8384                                 vm_map_unlock(*real_map);
8385                         }
8386                         *real_map = map;
8387                         return KERN_INVALID_ADDRESS;
8388                 }
8389
8390                 /* find the attenuated shadow of the underlying object */
8391                 /* on our target map */
8392
8393                 /* in english the submap object may extend beyond the     */
8394                 /* region mapped by the entry or, may only fill a portion */
8395                 /* of it.  For our purposes, we only care if the object   */
8396                 /* doesn't fill.  In this case the area which will        */
8397                 /* ultimately be clipped in the top map will only need    */
8398                 /* to be as big as the portion of the underlying entry    */
8399                 /* which is mapped */
8400                 start_delta = submap_entry->vme_start > entry->offset ?
8401                         submap_entry->vme_start - entry->offset : 0;
8402
8403                 end_delta =
8404                         (entry->offset + start_delta + (old_end - old_start)) <=
8405                         submap_entry->vme_end ?
8406                         0 : (entry->offset +
8407                              (old_end - old_start))
8408                         - submap_entry->vme_end;
8409
8410                 old_start += start_delta;
8411                 old_end -= end_delta;
8412
8413                 if(submap_entry->is_sub_map) {
8414                         entry = submap_entry;
8415                         vaddr = local_vaddr;
8416                         goto submap_recurse;
8417                 }
8418
8419                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8420
8421                         vm_object_t     sub_object, copy_object;
8422                         vm_object_offset_t copy_offset;
8423                         vm_map_offset_t local_start;
8424                         vm_map_offset_t local_end;
8425                         boolean_t               copied_slowly = FALSE;
8426
8427                         if (vm_map_lock_read_to_write(map)) {
8428                                 vm_map_lock_read(map);
8429                                 old_start -= start_delta;
8430                                 old_end += end_delta;
8431                                 goto RetrySubMap;
8432                         }
8433
8434
8435                         sub_object = submap_entry->object.vm_object;
8436                         if (sub_object == VM_OBJECT_NULL) {
8437                                 sub_object =
8438                                         vm_object_allocate(
8439                                                 (vm_map_size_t)
8440                                                 (submap_entry->vme_end -
8441                                                  submap_entry->vme_start));
8442                                 submap_entry->object.vm_object = sub_object;
8443                                 submap_entry->offset = 0;
8444                         }
8445                         local_start =  local_vaddr -
8446                                 (cow_parent_vaddr - old_start);
8447                         local_end = local_vaddr +
8448                                 (old_end - cow_parent_vaddr);
8449                         vm_map_clip_start(map, submap_entry, local_start);
8450                         vm_map_clip_end(map, submap_entry, local_end);
8451                         /* unnesting was done in vm_map_clip_start/end() */
8452                         assert(!submap_entry->use_pmap);
8453
8454                         /* This is the COW case, lets connect */
8455                         /* an entry in our space to the underlying */
8456                         /* object in the submap, bypassing the  */
8457                         /* submap. */
8458
8459
8460                         if(submap_entry->wired_count != 0 ||
8461                            (sub_object->copy_strategy ==
8462                             MEMORY_OBJECT_COPY_NONE)) {
8463                                 vm_object_lock(sub_object);
8464                                 vm_object_copy_slowly(sub_object,
8465                                                       submap_entry->offset,
8466                                                       (submap_entry->vme_end -
8467                                                        submap_entry->vme_start),
8468                                                       FALSE,
8469                                                       &copy_object);
8470                                 copied_slowly = TRUE;
8471                         } else {
8472
8473                                 /* set up shadow object */
8474                                 copy_object = sub_object;
8475                                 vm_object_reference(copy_object);
8476                                 sub_object->shadowed = TRUE;
8477                                 submap_entry->needs_copy = TRUE;
8478
8479                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8480
8481                                 if (override_nx(map, submap_entry->alias) && prot)
8482                                         prot |= VM_PROT_EXECUTE;
8483
8484                                 vm_object_pmap_protect(
8485                                         sub_object,
8486                                         submap_entry->offset,
8487                                         submap_entry->vme_end -
8488                                         submap_entry->vme_start,
8489                                         (submap_entry->is_shared
8490                                          || map->mapped) ?
8491                                         PMAP_NULL : map->pmap,
8492                                         submap_entry->vme_start,
8493                                         prot);
8494                         }
8495
8496                         /*
8497                          * Adjust the fault offset to the submap entry.
8498                          */
8499                         copy_offset = (local_vaddr -
8500                                        submap_entry->vme_start +
8501                                        submap_entry->offset);
8502
8503                         /* This works diffently than the   */
8504                         /* normal submap case. We go back  */
8505                         /* to the parent of the cow map and*/
8506                         /* clip out the target portion of  */
8507                         /* the sub_map, substituting the   */
8508                         /* new copy object,                */
8509
8510                         vm_map_unlock(map);
8511                         local_start = old_start;
8512                         local_end = old_end;
8513                         map = cow_sub_map_parent;
8514                         *var_map = cow_sub_map_parent;
8515                         vaddr = cow_parent_vaddr;
8516                         cow_sub_map_parent = NULL;
8517
8518                         if(!vm_map_lookup_entry(map,
8519                                                 vaddr, &entry)) {
8520                                 vm_object_deallocate(
8521                                         copy_object);
8522                                 vm_map_lock_write_to_read(map);
8523                                 return KERN_INVALID_ADDRESS;
8524                         }
8525
8526                         /* clip out the portion of space */
8527                         /* mapped by the sub map which   */
8528                         /* corresponds to the underlying */
8529                         /* object */
8530
8531                         /*
8532                          * Clip (and unnest) the smallest nested chunk
8533                          * possible around the faulting address...
8534                          */
8535                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8536                         local_end = local_start + pmap_nesting_size_min;
8537                         /*
8538                          * ... but don't go beyond the "old_start" to "old_end"
8539                          * range, to avoid spanning over another VM region
8540                          * with a possibly different VM object and/or offset.
8541                          */
8542                         if (local_start < old_start) {
8543                                 local_start = old_start;
8544                         }
8545                         if (local_end > old_end) {
8546                                 local_end = old_end;
8547                         }
8548                         /*
8549                          * Adjust copy_offset to the start of the range.
8550                          */
8551                         copy_offset -= (vaddr - local_start);
8552
8553                         vm_map_clip_start(map, entry, local_start);
8554                         vm_map_clip_end(map, entry, local_end);
8555                         /* unnesting was done in vm_map_clip_start/end() */
8556                         assert(!entry->use_pmap);
8557
8558                         /* substitute copy object for */
8559                         /* shared map entry           */
8560                         vm_map_deallocate(entry->object.sub_map);
8561                         entry->is_sub_map = FALSE;
8562                         entry->object.vm_object = copy_object;
8563
8564                         /* propagate the submap entry's protections */
8565                         entry->protection |= submap_entry->protection;
8566                         entry->max_protection |= submap_entry->max_protection;
8567
8568                         if(copied_slowly) {
8569                                 entry->offset = local_start - old_start;
8570                                 entry->needs_copy = FALSE;
8571                                 entry->is_shared = FALSE;
8572                         } else {
8573                                 entry->offset = copy_offset;
8574                                 entry->needs_copy = TRUE;
8575                                 if(entry->inheritance == VM_INHERIT_SHARE)
8576                                         entry->inheritance = VM_INHERIT_COPY;
8577                                 if (map != old_map)
8578                                         entry->is_shared = TRUE;
8579                         }
8580                         if(entry->inheritance == VM_INHERIT_SHARE)
8581                                 entry->inheritance = VM_INHERIT_COPY;
8582
8583                         vm_map_lock_write_to_read(map);
8584                 } else {
8585                         if((cow_sub_map_parent)
8586                            && (cow_sub_map_parent != *real_map)
8587                            && (cow_sub_map_parent != map)) {
8588                                 vm_map_unlock(cow_sub_map_parent);
8589                         }
8590                         entry = submap_entry;
8591                         vaddr = local_vaddr;
8592                 }
8593         }
8594
8595         /*
8596          *      Check whether this task is allowed to have
8597          *      this page.
8598          */
8599
8600         prot = entry->protection;
8601
8602         if (override_nx(map, entry->alias) && prot) {
8603                 /*
8604                  * HACK -- if not a stack, then allow execution
8605                  */
8606                 prot |= VM_PROT_EXECUTE;
8607         }
8608
8609         if ((fault_type & (prot)) != fault_type) {
8610                 if (*real_map != map) {
8611                         vm_map_unlock(*real_map);
8612                 }
8613                 *real_map = map;
8614
8615                 if ((fault_type & VM_PROT_EXECUTE) && prot)
8616                         log_stack_execution_failure((addr64_t)vaddr, prot);
8617
8618                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8619                 return KERN_PROTECTION_FAILURE;
8620         }
8621
8622         /*
8623          *      If this page is not pageable, we have to get
8624          *      it for all possible accesses.
8625          */
8626
8627         *wired = (entry->wired_count != 0);
8628         if (*wired)
8629                 fault_type = prot;
8630
8631         /*
8632          *      If the entry was copy-on-write, we either ...
8633          */
8634
8635         if (entry->needs_copy) {
8636                 /*
8637                  *      If we want to write the page, we may as well
8638                  *      handle that now since we've got the map locked.
8639                  *
8640                  *      If we don't need to write the page, we just
8641                  *      demote the permissions allowed.
8642                  */
8643
8644                 if ((fault_type & VM_PROT_WRITE) || *wired) {
8645                         /*
8646                          *      Make a new object, and place it in the
8647                          *      object chain.  Note that no new references
8648                          *      have appeared -- one just moved from the
8649                          *      map to the new object.
8650                          */
8651
8652                         if (vm_map_lock_read_to_write(map)) {
8653                                 vm_map_lock_read(map);
8654                                 goto RetryLookup;
8655                         }
8656                         vm_object_shadow(&entry->object.vm_object,
8657                                          &entry->offset,
8658                                          (vm_map_size_t) (entry->vme_end -
8659                                                           entry->vme_start));
8660
8661                         entry->object.vm_object->shadowed = TRUE;
8662                         entry->needs_copy = FALSE;
8663                         vm_map_lock_write_to_read(map);
8664                 }
8665                 else {
8666                         /*
8667                          *      We're attempting to read a copy-on-write
8668                          *      page -- don't allow writes.
8669                          */
8670
8671                         prot &= (~VM_PROT_WRITE);
8672                 }
8673         }
8674
8675         /*
8676          *      Create an object if necessary.
8677          */
8678         if (entry->object.vm_object == VM_OBJECT_NULL) {
8679
8680                 if (vm_map_lock_read_to_write(map)) {
8681                         vm_map_lock_read(map);
8682                         goto RetryLookup;
8683                 }
8684
8685                 entry->object.vm_object = vm_object_allocate(
8686                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
8687                 entry->offset = 0;
8688                 vm_map_lock_write_to_read(map);
8689         }
8690
8691         /*
8692          *      Return the object/offset from this entry.  If the entry
8693          *      was copy-on-write or empty, it has been fixed up.  Also
8694          *      return the protection.
8695          */
8696
8697         *offset = (vaddr - entry->vme_start) + entry->offset;
8698         *object = entry->object.vm_object;
8699         *out_prot = prot;
8700
8701         if (fault_info) {
8702                 fault_info->interruptible = THREAD_UNINT; /* for now... */
8703                 /* ... the caller will change "interruptible" if needed */
8704                 fault_info->cluster_size = 0;
8705                 fault_info->user_tag = entry->alias;
8706                 fault_info->behavior = entry->behavior;
8707                 fault_info->lo_offset = entry->offset;
8708                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8709                 fault_info->no_cache  = entry->no_cache;
8710                 fault_info->stealth = FALSE;
8711         }
8712
8713         /*
8714          *      Lock the object to prevent it from disappearing
8715          */
8716         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8717                 vm_object_lock(*object);
8718         else
8719                 vm_object_lock_shared(*object);
8720
8721         /*
8722          *      Save the version number
8723          */
8724
8725         out_version->main_timestamp = map->timestamp;
8726
8727         return KERN_SUCCESS;
8728 }
8729
8730
8731 /*
8732  *      vm_map_verify:
8733  *
8734  *      Verifies that the map in question has not changed
8735  *      since the given version.  If successful, the map
8736  *      will not change until vm_map_verify_done() is called.
8737  */
8738 boolean_t
8739 vm_map_verify(
8740         register vm_map_t               map,
8741         register vm_map_version_t       *version)       /* REF */
8742 {
8743         boolean_t       result;
8744
8745         vm_map_lock_read(map);
8746         result = (map->timestamp == version->main_timestamp);
8747
8748         if (!result)
8749                 vm_map_unlock_read(map);
8750
8751         return(result);
8752 }
8753
8754 /*
8755  *      vm_map_verify_done:
8756  *
8757  *      Releases locks acquired by a vm_map_verify.
8758  *
8759  *      This is now a macro in vm/vm_map.h.  It does a
8760  *      vm_map_unlock_read on the map.
8761  */
8762
8763
8764 /*
8765  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8766  *      Goes away after regular vm_region_recurse function migrates to
8767  *      64 bits
8768  *      vm_region_recurse: A form of vm_region which follows the
8769  *      submaps in a target map
8770  *
8771  */
8772
8773 kern_return_t
8774 vm_map_region_recurse_64(
8775         vm_map_t                 map,
8776         vm_map_offset_t *address,               /* IN/OUT */
8777         vm_map_size_t           *size,                  /* OUT */
8778         natural_t               *nesting_depth, /* IN/OUT */
8779         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
8780         mach_msg_type_number_t  *count) /* IN/OUT */
8781 {
8782         vm_region_extended_info_data_t  extended;
8783         vm_map_entry_t                  tmp_entry;
8784         vm_map_offset_t                 user_address;
8785         unsigned int                    user_max_depth;
8786
8787         /*
8788          * "curr_entry" is the VM map entry preceding or including the
8789          * address we're looking for.
8790          * "curr_map" is the map or sub-map containing "curr_entry".
8791          * "curr_offset" is the cumulated offset of "curr_map" in the
8792          * target task's address space.
8793          * "curr_depth" is the depth of "curr_map" in the chain of
8794          * sub-maps.
8795          * "curr_max_offset" is the maximum offset we should take into
8796          * account in the current map.  It may be smaller than the current
8797          * map's "max_offset" because we might not have mapped it all in
8798          * the upper level map.
8799          */
8800         vm_map_entry_t                  curr_entry;
8801         vm_map_offset_t                 curr_offset;
8802         vm_map_t                        curr_map;
8803         unsigned int                    curr_depth;
8804         vm_map_offset_t                 curr_max_offset;
8805
8806         /*
8807          * "next_" is the same as "curr_" but for the VM region immediately
8808          * after the address we're looking for.  We need to keep track of this
8809          * too because we want to return info about that region if the
8810          * address we're looking for is not mapped.
8811          */
8812         vm_map_entry_t                  next_entry;
8813         vm_map_offset_t                 next_offset;
8814         vm_map_t                        next_map;
8815         unsigned int                    next_depth;
8816         vm_map_offset_t                 next_max_offset;
8817
8818         boolean_t                       look_for_pages;
8819         vm_region_submap_short_info_64_t short_info;
8820
8821         if (map == VM_MAP_NULL) {
8822                 /* no address space to work on */
8823                 return KERN_INVALID_ARGUMENT;
8824         }
8825
8826         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8827                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8828                         /*
8829                          * "info" structure is not big enough and
8830                          * would overflow
8831                          */
8832                         return KERN_INVALID_ARGUMENT;
8833                 } else {
8834                         look_for_pages = FALSE;
8835                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8836                         short_info = (vm_region_submap_short_info_64_t) submap_info;
8837                         submap_info = NULL;
8838                 }
8839         } else {
8840                 look_for_pages = TRUE;
8841                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8842                 short_info = NULL;
8843         }
8844
8845
8846         user_address = *address;
8847         user_max_depth = *nesting_depth;
8848
8849         curr_entry = NULL;
8850         curr_map = map;
8851         curr_offset = 0;
8852         curr_depth = 0;
8853         curr_max_offset = curr_map->max_offset;
8854
8855         next_entry = NULL;
8856         next_map = NULL;
8857         next_offset = 0;
8858         next_depth = 0;
8859         next_max_offset = curr_max_offset;
8860
8861         if (not_in_kdp) {
8862                 vm_map_lock_read(curr_map);
8863         }
8864
8865         for (;;) {
8866                 if (vm_map_lookup_entry(curr_map,
8867                                         user_address - curr_offset,
8868                                         &tmp_entry)) {
8869                         /* tmp_entry contains the address we're looking for */
8870                         curr_entry = tmp_entry;
8871                 } else {
8872                         /*
8873                          * The address is not mapped.  "tmp_entry" is the
8874                          * map entry preceding the address.  We want the next
8875                          * one, if it exists.
8876                          */
8877                         curr_entry = tmp_entry->vme_next;
8878                         if (curr_entry == vm_map_to_entry(curr_map) ||
8879                             curr_entry->vme_start >= curr_max_offset) {
8880                                 /* no next entry at this level: stop looking */
8881                                 if (not_in_kdp) {
8882                                         vm_map_unlock_read(curr_map);
8883                                 }
8884                                 curr_entry = NULL;
8885                                 curr_map = NULL;
8886                                 curr_offset = 0;
8887                                 curr_depth = 0;
8888                                 curr_max_offset = 0;
8889                                 break;
8890                         }
8891                 }
8892
8893                 /*
8894                  * Is the next entry at this level closer to the address (or
8895                  * deeper in the submap chain) than the one we had
8896                  * so far ?
8897                  */
8898                 tmp_entry = curr_entry->vme_next;
8899                 if (tmp_entry == vm_map_to_entry(curr_map)) {
8900                         /* no next entry at this level */
8901                 } else if (tmp_entry->vme_start >= curr_max_offset) {
8902                         /*
8903                          * tmp_entry is beyond the scope of what we mapped of
8904                          * this submap in the upper level: ignore it.
8905                          */
8906                 } else if ((next_entry == NULL) ||
8907                            (tmp_entry->vme_start + curr_offset <=
8908                             next_entry->vme_start + next_offset)) {
8909                         /*
8910                          * We didn't have a "next_entry" or this one is
8911                          * closer to the address we're looking for:
8912                          * use this "tmp_entry" as the new "next_entry".
8913                          */
8914                         if (next_entry != NULL) {
8915                                 /* unlock the last "next_map" */
8916                                 if (next_map != curr_map && not_in_kdp) {
8917                                         vm_map_unlock_read(next_map);
8918                                 }
8919                         }
8920                         next_entry = tmp_entry;
8921                         next_map = curr_map;
8922                         next_offset = curr_offset;
8923                         next_depth = curr_depth;
8924                         next_max_offset = curr_max_offset;
8925                 }
8926
8927                 if (!curr_entry->is_sub_map ||
8928                     curr_depth >= user_max_depth) {
8929                         /*
8930                          * We hit a leaf map or we reached the maximum depth
8931                          * we could, so stop looking.  Keep the current map
8932                          * locked.
8933                          */
8934                         break;
8935                 }
8936
8937                 /*
8938                  * Get down to the next submap level.
8939                  */
8940
8941                 /*
8942                  * Lock the next level and unlock the current level,
8943                  * unless we need to keep it locked to access the "next_entry"
8944                  * later.
8945                  */
8946                 if (not_in_kdp) {
8947                         vm_map_lock_read(curr_entry->object.sub_map);
8948                 }
8949                 if (curr_map == next_map) {
8950                         /* keep "next_map" locked in case we need it */
8951                 } else {
8952                         /* release this map */
8953                         if (not_in_kdp)
8954                                 vm_map_unlock_read(curr_map);
8955                 }
8956
8957                 /*
8958                  * Adjust the offset.  "curr_entry" maps the submap
8959                  * at relative address "curr_entry->vme_start" in the
8960                  * curr_map but skips the first "curr_entry->offset"
8961                  * bytes of the submap.
8962                  * "curr_offset" always represents the offset of a virtual
8963                  * address in the curr_map relative to the absolute address
8964                  * space (i.e. the top-level VM map).
8965                  */
8966                 curr_offset +=
8967                         (curr_entry->vme_start - curr_entry->offset);
8968                 /* switch to the submap */
8969                 curr_map = curr_entry->object.sub_map;
8970                 curr_depth++;
8971                 /*
8972                  * "curr_max_offset" allows us to keep track of the
8973                  * portion of the submap that is actually mapped at this level:
8974                  * the rest of that submap is irrelevant to us, since it's not
8975                  * mapped here.
8976                  * The relevant portion of the map starts at
8977                  * "curr_entry->offset" up to the size of "curr_entry".
8978                  */
8979                 curr_max_offset =
8980                         curr_entry->vme_end - curr_entry->vme_start +
8981                         curr_entry->offset;
8982                 curr_entry = NULL;
8983         }
8984
8985         if (curr_entry == NULL) {
8986                 /* no VM region contains the address... */
8987                 if (next_entry == NULL) {
8988                         /* ... and no VM region follows it either */
8989                         return KERN_INVALID_ADDRESS;
8990                 }
8991                 /* ... gather info about the next VM region */
8992                 curr_entry = next_entry;
8993                 curr_map = next_map;    /* still locked ... */
8994                 curr_offset = next_offset;
8995                 curr_depth = next_depth;
8996                 curr_max_offset = next_max_offset;
8997         } else {
8998                 /* we won't need "next_entry" after all */
8999                 if (next_entry != NULL) {
9000                         /* release "next_map" */
9001                         if (next_map != curr_map && not_in_kdp) {
9002                                 vm_map_unlock_read(next_map);
9003                         }
9004                 }
9005         }
9006         next_entry = NULL;
9007         next_map = NULL;
9008         next_offset = 0;
9009         next_depth = 0;
9010         next_max_offset = 0;
9011
9012         *nesting_depth = curr_depth;
9013         *size = curr_entry->vme_end - curr_entry->vme_start;
9014         *address = curr_entry->vme_start + curr_offset;
9015
9016 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9017 // so probably should be a real 32b ID vs. ptr.
9018 // Current users just check for equality
9019 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9020
9021         if (look_for_pages) {
9022                 submap_info->user_tag = curr_entry->alias;
9023                 submap_info->offset = curr_entry->offset;
9024                 submap_info->protection = curr_entry->protection;
9025                 submap_info->inheritance = curr_entry->inheritance;
9026                 submap_info->max_protection = curr_entry->max_protection;
9027                 submap_info->behavior = curr_entry->behavior;
9028                 submap_info->user_wired_count = curr_entry->user_wired_count;
9029                 submap_info->is_submap = curr_entry->is_sub_map;
9030                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9031         } else {
9032                 short_info->user_tag = curr_entry->alias;
9033                 short_info->offset = curr_entry->offset;
9034                 short_info->protection = curr_entry->protection;
9035                 short_info->inheritance = curr_entry->inheritance;
9036                 short_info->max_protection = curr_entry->max_protection;
9037                 short_info->behavior = curr_entry->behavior;
9038                 short_info->user_wired_count = curr_entry->user_wired_count;
9039                 short_info->is_submap = curr_entry->is_sub_map;
9040                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9041         }
9042
9043         extended.pages_resident = 0;
9044         extended.pages_swapped_out = 0;
9045         extended.pages_shared_now_private = 0;
9046         extended.pages_dirtied = 0;
9047         extended.external_pager = 0;
9048         extended.shadow_depth = 0;
9049
9050         if (not_in_kdp) {
9051                 if (!curr_entry->is_sub_map) {
9052                         vm_map_region_walk(curr_map,
9053                                            curr_entry->vme_start,
9054                                            curr_entry,
9055                                            curr_entry->offset,
9056                                            (curr_entry->vme_end -
9057                                             curr_entry->vme_start),
9058                                            &extended,
9059                                            look_for_pages);
9060                         if (extended.external_pager &&
9061                             extended.ref_count == 2 &&
9062                             extended.share_mode == SM_SHARED) {
9063                                 extended.share_mode = SM_PRIVATE;
9064                         }
9065                 } else {
9066                         if (curr_entry->use_pmap) {
9067                                 extended.share_mode = SM_TRUESHARED;
9068                         } else {
9069                                 extended.share_mode = SM_PRIVATE;
9070                         }
9071                         extended.ref_count =
9072                                 curr_entry->object.sub_map->ref_count;
9073                 }
9074         }
9075
9076         if (look_for_pages) {
9077                 submap_info->pages_resident = extended.pages_resident;
9078                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9079                 submap_info->pages_shared_now_private =
9080                         extended.pages_shared_now_private;
9081                 submap_info->pages_dirtied = extended.pages_dirtied;
9082                 submap_info->external_pager = extended.external_pager;
9083                 submap_info->shadow_depth = extended.shadow_depth;
9084                 submap_info->share_mode = extended.share_mode;
9085                 submap_info->ref_count = extended.ref_count;
9086         } else {
9087                 short_info->external_pager = extended.external_pager;
9088                 short_info->shadow_depth = extended.shadow_depth;
9089                 short_info->share_mode = extended.share_mode;
9090                 short_info->ref_count = extended.ref_count;
9091         }
9092
9093         if (not_in_kdp) {
9094                 vm_map_unlock_read(curr_map);
9095         }
9096
9097         return KERN_SUCCESS;
9098 }
9099
9100 /*
9101  *      vm_region:
9102  *
9103  *      User call to obtain information about a region in
9104  *      a task's address map. Currently, only one flavor is
9105  *      supported.
9106  *
9107  *      XXX The reserved and behavior fields cannot be filled
9108  *          in until the vm merge from the IK is completed, and
9109  *          vm_reserve is implemented.
9110  */
9111
9112 kern_return_t
9113 vm_map_region(
9114         vm_map_t                 map,
9115         vm_map_offset_t *address,               /* IN/OUT */
9116         vm_map_size_t           *size,                  /* OUT */
9117         vm_region_flavor_t       flavor,                /* IN */
9118         vm_region_info_t         info,                  /* OUT */
9119         mach_msg_type_number_t  *count, /* IN/OUT */
9120         mach_port_t             *object_name)           /* OUT */
9121 {
9122         vm_map_entry_t          tmp_entry;
9123         vm_map_entry_t          entry;
9124         vm_map_offset_t         start;
9125
9126         if (map == VM_MAP_NULL)
9127                 return(KERN_INVALID_ARGUMENT);
9128
9129         switch (flavor) {
9130
9131         case VM_REGION_BASIC_INFO:
9132                 /* legacy for old 32-bit objects info */
9133         {
9134                 vm_region_basic_info_t  basic;
9135
9136                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9137                         return(KERN_INVALID_ARGUMENT);
9138
9139                 basic = (vm_region_basic_info_t) info;
9140                 *count = VM_REGION_BASIC_INFO_COUNT;
9141
9142                 vm_map_lock_read(map);
9143
9144                 start = *address;
9145                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9146                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9147                                 vm_map_unlock_read(map);
9148                                 return(KERN_INVALID_ADDRESS);
9149                         }
9150                 } else {
9151                         entry = tmp_entry;
9152                 }
9153
9154                 start = entry->vme_start;
9155
9156                 basic->offset = (uint32_t)entry->offset;
9157                 basic->protection = entry->protection;
9158                 basic->inheritance = entry->inheritance;
9159                 basic->max_protection = entry->max_protection;
9160                 basic->behavior = entry->behavior;
9161                 basic->user_wired_count = entry->user_wired_count;
9162                 basic->reserved = entry->is_sub_map;
9163                 *address = start;
9164                 *size = (entry->vme_end - start);
9165
9166                 if (object_name) *object_name = IP_NULL;
9167                 if (entry->is_sub_map) {
9168                         basic->shared = FALSE;
9169                 } else {
9170                         basic->shared = entry->is_shared;
9171                 }
9172
9173                 vm_map_unlock_read(map);
9174                 return(KERN_SUCCESS);
9175         }
9176
9177         case VM_REGION_BASIC_INFO_64:
9178         {
9179                 vm_region_basic_info_64_t       basic;
9180
9181                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9182                         return(KERN_INVALID_ARGUMENT);
9183
9184                 basic = (vm_region_basic_info_64_t) info;
9185                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9186
9187                 vm_map_lock_read(map);
9188
9189                 start = *address;
9190                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9191                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9192                                 vm_map_unlock_read(map);
9193                                 return(KERN_INVALID_ADDRESS);
9194                         }
9195                 } else {
9196                         entry = tmp_entry;
9197                 }
9198
9199                 start = entry->vme_start;
9200
9201                 basic->offset = entry->offset;
9202                 basic->protection = entry->protection;
9203                 basic->inheritance = entry->inheritance;
9204                 basic->max_protection = entry->max_protection;
9205                 basic->behavior = entry->behavior;
9206                 basic->user_wired_count = entry->user_wired_count;
9207                 basic->reserved = entry->is_sub_map;
9208                 *address = start;
9209                 *size = (entry->vme_end - start);
9210
9211                 if (object_name) *object_name = IP_NULL;
9212                 if (entry->is_sub_map) {
9213                         basic->shared = FALSE;
9214                 } else {
9215                         basic->shared = entry->is_shared;
9216                 }
9217
9218                 vm_map_unlock_read(map);
9219                 return(KERN_SUCCESS);
9220         }
9221         case VM_REGION_EXTENDED_INFO:
9222         {
9223                 vm_region_extended_info_t       extended;
9224
9225                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9226                         return(KERN_INVALID_ARGUMENT);
9227
9228                 extended = (vm_region_extended_info_t) info;
9229                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9230
9231                 vm_map_lock_read(map);
9232
9233                 start = *address;
9234                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9235                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9236                                 vm_map_unlock_read(map);
9237                                 return(KERN_INVALID_ADDRESS);
9238                         }
9239                 } else {
9240                         entry = tmp_entry;
9241                 }
9242                 start = entry->vme_start;
9243
9244                 extended->protection = entry->protection;
9245                 extended->user_tag = entry->alias;
9246                 extended->pages_resident = 0;
9247                 extended->pages_swapped_out = 0;
9248                 extended->pages_shared_now_private = 0;
9249                 extended->pages_dirtied = 0;
9250                 extended->external_pager = 0;
9251                 extended->shadow_depth = 0;
9252
9253                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9254
9255                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9256                         extended->share_mode = SM_PRIVATE;
9257
9258                 if (object_name)
9259                         *object_name = IP_NULL;
9260                 *address = start;
9261                 *size = (entry->vme_end - start);
9262
9263                 vm_map_unlock_read(map);
9264                 return(KERN_SUCCESS);
9265         }
9266         case VM_REGION_TOP_INFO:
9267         {
9268                 vm_region_top_info_t    top;
9269
9270                 if (*count < VM_REGION_TOP_INFO_COUNT)
9271                         return(KERN_INVALID_ARGUMENT);
9272
9273                 top = (vm_region_top_info_t) info;
9274                 *count = VM_REGION_TOP_INFO_COUNT;
9275
9276                 vm_map_lock_read(map);
9277
9278                 start = *address;
9279                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9280                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9281                                 vm_map_unlock_read(map);
9282                                 return(KERN_INVALID_ADDRESS);
9283                         }
9284                 } else {
9285                         entry = tmp_entry;
9286
9287                 }
9288                 start = entry->vme_start;
9289
9290                 top->private_pages_resident = 0;
9291                 top->shared_pages_resident = 0;
9292
9293                 vm_map_region_top_walk(entry, top);
9294
9295                 if (object_name)
9296                         *object_name = IP_NULL;
9297                 *address = start;
9298                 *size = (entry->vme_end - start);
9299
9300                 vm_map_unlock_read(map);
9301                 return(KERN_SUCCESS);
9302         }
9303         default:
9304                 return(KERN_INVALID_ARGUMENT);
9305         }
9306 }
9307
9308 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9309         MIN((entry_size),                                               \
9310             ((obj)->all_reusable ?                                      \
9311              (obj)->wired_page_count :                                  \
9312              (obj)->resident_page_count - (obj)->reusable_page_count))
9313
9314 void
9315 vm_map_region_top_walk(
9316         vm_map_entry_t             entry,
9317         vm_region_top_info_t       top)
9318 {
9319
9320         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9321                 top->share_mode = SM_EMPTY;
9322                 top->ref_count = 0;
9323                 top->obj_id = 0;
9324                 return;
9325         }
9326
9327         {
9328                 struct  vm_object *obj, *tmp_obj;
9329                 int             ref_count;
9330                 uint32_t        entry_size;
9331
9332                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9333
9334                 obj = entry->object.vm_object;
9335
9336                 vm_object_lock(obj);
9337
9338                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9339                         ref_count--;
9340
9341                 assert(obj->reusable_page_count <= obj->resident_page_count);
9342                 if (obj->shadow) {
9343                         if (ref_count == 1)
9344                                 top->private_pages_resident =
9345                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9346                         else
9347                                 top->shared_pages_resident =
9348                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9349                         top->ref_count  = ref_count;
9350                         top->share_mode = SM_COW;
9351
9352                         while ((tmp_obj = obj->shadow)) {
9353                                 vm_object_lock(tmp_obj);
9354                                 vm_object_unlock(obj);
9355                                 obj = tmp_obj;
9356
9357                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9358                                         ref_count--;
9359
9360                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9361                                 top->shared_pages_resident +=
9362                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9363                                 top->ref_count += ref_count - 1;
9364                         }
9365                 } else {
9366                         if (entry->needs_copy) {
9367                                 top->share_mode = SM_COW;
9368                                 top->shared_pages_resident =
9369                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9370                         } else {
9371                                 if (ref_count == 1 ||
9372                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9373                                         top->share_mode = SM_PRIVATE;
9374                                         top->private_pages_resident =
9375                                                 OBJ_RESIDENT_COUNT(obj,
9376                                                                    entry_size);
9377                                 } else {
9378                                         top->share_mode = SM_SHARED;
9379                                         top->shared_pages_resident =
9380                                                 OBJ_RESIDENT_COUNT(obj,
9381                                                                   entry_size);
9382                                 }
9383                         }
9384                         top->ref_count = ref_count;
9385                 }
9386                 /* XXX K64: obj_id will be truncated */
9387                 top->obj_id = (unsigned int) (uintptr_t)obj;
9388
9389                 vm_object_unlock(obj);
9390         }
9391 }
9392
9393 void
9394 vm_map_region_walk(
9395         vm_map_t                        map,
9396         vm_map_offset_t                 va,
9397         vm_map_entry_t                  entry,
9398         vm_object_offset_t              offset,
9399         vm_object_size_t                range,
9400         vm_region_extended_info_t       extended,
9401         boolean_t                       look_for_pages)
9402 {
9403         register struct vm_object *obj, *tmp_obj;
9404         register vm_map_offset_t       last_offset;
9405         register int               i;
9406         register int               ref_count;
9407         struct vm_object        *shadow_object;
9408         int                     shadow_depth;
9409
9410         if ((entry->object.vm_object == 0) ||
9411             (entry->is_sub_map) ||
9412             (entry->object.vm_object->phys_contiguous)) {
9413                 extended->share_mode = SM_EMPTY;
9414                 extended->ref_count = 0;
9415                 return;
9416         }
9417         {
9418                 obj = entry->object.vm_object;
9419
9420                 vm_object_lock(obj);
9421
9422                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9423                         ref_count--;
9424
9425                 if (look_for_pages) {
9426                         for (last_offset = offset + range;
9427                              offset < last_offset;
9428                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9429                                 vm_map_region_look_for_page(map, va, obj,
9430                                                             offset, ref_count,
9431                                                             0, extended);
9432                 } else {
9433                         shadow_object = obj->shadow;
9434                         shadow_depth = 0;
9435
9436                         if ( !(obj->pager_trusted) && !(obj->internal))
9437                                 extended->external_pager = 1;
9438
9439                         if (shadow_object != VM_OBJECT_NULL) {
9440                                 vm_object_lock(shadow_object);
9441                                 for (;
9442                                      shadow_object != VM_OBJECT_NULL;
9443                                      shadow_depth++) {
9444                                         vm_object_t     next_shadow;
9445
9446                                         if ( !(shadow_object->pager_trusted) &&
9447                                              !(shadow_object->internal))
9448                                                 extended->external_pager = 1;
9449
9450                                         next_shadow = shadow_object->shadow;
9451                                         if (next_shadow) {
9452                                                 vm_object_lock(next_shadow);
9453                                         }
9454                                         vm_object_unlock(shadow_object);
9455                                         shadow_object = next_shadow;
9456                                 }
9457                         }
9458                         extended->shadow_depth = shadow_depth;
9459                 }
9460
9461                 if (extended->shadow_depth || entry->needs_copy)
9462                         extended->share_mode = SM_COW;
9463                 else {
9464                         if (ref_count == 1)
9465                                 extended->share_mode = SM_PRIVATE;
9466                         else {
9467                                 if (obj->true_share)
9468                                         extended->share_mode = SM_TRUESHARED;
9469                                 else
9470                                         extended->share_mode = SM_SHARED;
9471                         }
9472                 }
9473                 extended->ref_count = ref_count - extended->shadow_depth;
9474
9475                 for (i = 0; i < extended->shadow_depth; i++) {
9476                         if ((tmp_obj = obj->shadow) == 0)
9477                                 break;
9478                         vm_object_lock(tmp_obj);
9479                         vm_object_unlock(obj);
9480
9481                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9482                                 ref_count--;
9483
9484                         extended->ref_count += ref_count;
9485                         obj = tmp_obj;
9486                 }
9487                 vm_object_unlock(obj);
9488
9489                 if (extended->share_mode == SM_SHARED) {
9490                         register vm_map_entry_t      cur;
9491                         register vm_map_entry_t      last;
9492                         int      my_refs;
9493
9494                         obj = entry->object.vm_object;
9495                         last = vm_map_to_entry(map);
9496                         my_refs = 0;
9497
9498                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9499                                 ref_count--;
9500                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9501                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
9502
9503                         if (my_refs == ref_count)
9504                                 extended->share_mode = SM_PRIVATE_ALIASED;
9505                         else if (my_refs > 1)
9506                                 extended->share_mode = SM_SHARED_ALIASED;
9507                 }
9508         }
9509 }
9510
9511
9512 /* object is locked on entry and locked on return */
9513
9514
9515 static void
9516 vm_map_region_look_for_page(
9517         __unused vm_map_t               map,
9518         __unused vm_map_offset_t        va,
9519         vm_object_t                     object,
9520         vm_object_offset_t              offset,
9521         int                             max_refcnt,
9522         int                             depth,
9523         vm_region_extended_info_t       extended)
9524 {
9525         register vm_page_t      p;
9526         register vm_object_t    shadow;
9527         register int            ref_count;
9528         vm_object_t             caller_object;
9529 #if     MACH_PAGEMAP
9530         kern_return_t           kr;
9531 #endif
9532         shadow = object->shadow;
9533         caller_object = object;
9534
9535
9536         while (TRUE) {
9537
9538                 if ( !(object->pager_trusted) && !(object->internal))
9539                         extended->external_pager = 1;
9540
9541                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9542                         if (shadow && (max_refcnt == 1))
9543                                 extended->pages_shared_now_private++;
9544
9545                         if (!p->fictitious &&
9546                             (p->dirty || pmap_is_modified(p->phys_page)))
9547                                 extended->pages_dirtied++;
9548
9549                         extended->pages_resident++;
9550
9551                         if(object != caller_object)
9552                                 vm_object_unlock(object);
9553
9554                         return;
9555                 }
9556 #if     MACH_PAGEMAP
9557                 if (object->existence_map) {
9558                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9559
9560                                 extended->pages_swapped_out++;
9561
9562                                 if(object != caller_object)
9563                                         vm_object_unlock(object);
9564
9565                                 return;
9566                         }
9567                 } else if (object->internal &&
9568                            object->alive &&
9569                            !object->terminating &&
9570                            object->pager_ready) {
9571
9572                         memory_object_t pager;
9573
9574                         vm_object_paging_begin(object);
9575                         pager = object->pager;
9576                         vm_object_unlock(object);
9577
9578                         kr = memory_object_data_request(
9579                                 pager,
9580                                 offset + object->paging_offset,
9581                                 0, /* just poke the pager */
9582                                 VM_PROT_READ,
9583                                 NULL);
9584
9585                         vm_object_lock(object);
9586                         vm_object_paging_end(object);
9587
9588                         if (kr == KERN_SUCCESS) {
9589                                 /* the pager has that page */
9590                                 extended->pages_swapped_out++;
9591                                 if (object != caller_object)
9592                                         vm_object_unlock(object);
9593                                 return;
9594                         }
9595                 }
9596 #endif /* MACH_PAGEMAP */
9597
9598                 if (shadow) {
9599                         vm_object_lock(shadow);
9600
9601                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9602                                 ref_count--;
9603
9604                         if (++depth > extended->shadow_depth)
9605                                 extended->shadow_depth = depth;
9606
9607                         if (ref_count > max_refcnt)
9608                                 max_refcnt = ref_count;
9609
9610                         if(object != caller_object)
9611                                 vm_object_unlock(object);
9612
9613                         offset = offset + object->shadow_offset;
9614                         object = shadow;
9615                         shadow = object->shadow;
9616                         continue;
9617                 }
9618                 if(object != caller_object)
9619                         vm_object_unlock(object);
9620                 break;
9621         }
9622 }
9623
9624 static int
9625 vm_map_region_count_obj_refs(
9626         vm_map_entry_t    entry,
9627         vm_object_t       object)
9628 {
9629         register int ref_count;
9630         register vm_object_t chk_obj;
9631         register vm_object_t tmp_obj;
9632
9633         if (entry->object.vm_object == 0)
9634                 return(0);
9635
9636         if (entry->is_sub_map)
9637                 return(0);
9638         else {
9639                 ref_count = 0;
9640
9641                 chk_obj = entry->object.vm_object;
9642                 vm_object_lock(chk_obj);
9643
9644                 while (chk_obj) {
9645                         if (chk_obj == object)
9646                                 ref_count++;
9647                         tmp_obj = chk_obj->shadow;
9648                         if (tmp_obj)
9649                                 vm_object_lock(tmp_obj);
9650                         vm_object_unlock(chk_obj);
9651
9652                         chk_obj = tmp_obj;
9653                 }
9654         }
9655         return(ref_count);
9656 }
9657
9658
9659 /*
9660  *      Routine:        vm_map_simplify
9661  *
9662  *      Description:
9663  *              Attempt to simplify the map representation in
9664  *              the vicinity of the given starting address.
9665  *      Note:
9666  *              This routine is intended primarily to keep the
9667  *              kernel maps more compact -- they generally don't
9668  *              benefit from the "expand a map entry" technology
9669  *              at allocation time because the adjacent entry
9670  *              is often wired down.
9671  */
9672 void
9673 vm_map_simplify_entry(
9674         vm_map_t        map,
9675         vm_map_entry_t  this_entry)
9676 {
9677         vm_map_entry_t  prev_entry;
9678
9679         counter(c_vm_map_simplify_entry_called++);
9680
9681         prev_entry = this_entry->vme_prev;
9682
9683         if ((this_entry != vm_map_to_entry(map)) &&
9684             (prev_entry != vm_map_to_entry(map)) &&
9685
9686             (prev_entry->vme_end == this_entry->vme_start) &&
9687
9688             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9689
9690             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9691             ((prev_entry->offset + (prev_entry->vme_end -
9692                                     prev_entry->vme_start))
9693              == this_entry->offset) &&
9694
9695             (prev_entry->inheritance == this_entry->inheritance) &&
9696             (prev_entry->protection == this_entry->protection) &&
9697             (prev_entry->max_protection == this_entry->max_protection) &&
9698             (prev_entry->behavior == this_entry->behavior) &&
9699             (prev_entry->alias == this_entry->alias) &&
9700             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9701             (prev_entry->no_cache == this_entry->no_cache) &&
9702             (prev_entry->wired_count == this_entry->wired_count) &&
9703             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9704
9705             (prev_entry->needs_copy == this_entry->needs_copy) &&
9706             (prev_entry->permanent == this_entry->permanent) &&
9707
9708             (prev_entry->use_pmap == FALSE) &&
9709             (this_entry->use_pmap == FALSE) &&
9710             (prev_entry->in_transition == FALSE) &&
9711             (this_entry->in_transition == FALSE) &&
9712             (prev_entry->needs_wakeup == FALSE) &&
9713             (this_entry->needs_wakeup == FALSE) &&
9714             (prev_entry->is_shared == FALSE) &&
9715             (this_entry->is_shared == FALSE)
9716                 ) {
9717                 _vm_map_entry_unlink(&map->hdr, prev_entry);
9718                 this_entry->vme_start = prev_entry->vme_start;
9719                 this_entry->offset = prev_entry->offset;
9720                 if (prev_entry->is_sub_map) {
9721                         vm_map_deallocate(prev_entry->object.sub_map);
9722                 } else {
9723                         vm_object_deallocate(prev_entry->object.vm_object);
9724                 }
9725                 vm_map_entry_dispose(map, prev_entry);
9726                 SAVE_HINT_MAP_WRITE(map, this_entry);
9727                 counter(c_vm_map_simplified++);
9728         }
9729 }
9730
9731 void
9732 vm_map_simplify(
9733         vm_map_t        map,
9734         vm_map_offset_t start)
9735 {
9736         vm_map_entry_t  this_entry;
9737
9738         vm_map_lock(map);
9739         if (vm_map_lookup_entry(map, start, &this_entry)) {
9740                 vm_map_simplify_entry(map, this_entry);
9741                 vm_map_simplify_entry(map, this_entry->vme_next);
9742         }
9743         counter(c_vm_map_simplify_called++);
9744         vm_map_unlock(map);
9745 }
9746
9747 static void
9748 vm_map_simplify_range(
9749         vm_map_t        map,
9750         vm_map_offset_t start,
9751         vm_map_offset_t end)
9752 {
9753         vm_map_entry_t  entry;
9754
9755         /*
9756          * The map should be locked (for "write") by the caller.
9757          */
9758
9759         if (start >= end) {
9760                 /* invalid address range */
9761                 return;
9762         }
9763
9764         start = vm_map_trunc_page(start);
9765         end = vm_map_round_page(end);
9766
9767         if (!vm_map_lookup_entry(map, start, &entry)) {
9768                 /* "start" is not mapped and "entry" ends before "start" */
9769                 if (entry == vm_map_to_entry(map)) {
9770                         /* start with first entry in the map */
9771                         entry = vm_map_first_entry(map);
9772                 } else {
9773                         /* start with next entry */
9774                         entry = entry->vme_next;
9775                 }
9776         }
9777
9778         while (entry != vm_map_to_entry(map) &&
9779                entry->vme_start <= end) {
9780                 /* try and coalesce "entry" with its previous entry */
9781                 vm_map_simplify_entry(map, entry);
9782                 entry = entry->vme_next;
9783         }
9784 }
9785
9786
9787 /*
9788  *      Routine:        vm_map_machine_attribute
9789  *      Purpose:
9790  *              Provide machine-specific attributes to mappings,
9791  *              such as cachability etc. for machines that provide
9792  *              them.  NUMA architectures and machines with big/strange
9793  *              caches will use this.
9794  *      Note:
9795  *              Responsibilities for locking and checking are handled here,
9796  *              everything else in the pmap module. If any non-volatile
9797  *              information must be kept, the pmap module should handle
9798  *              it itself. [This assumes that attributes do not
9799  *              need to be inherited, which seems ok to me]
9800  */
9801 kern_return_t
9802 vm_map_machine_attribute(
9803         vm_map_t                        map,
9804         vm_map_offset_t         start,
9805         vm_map_offset_t         end,
9806         vm_machine_attribute_t  attribute,
9807         vm_machine_attribute_val_t* value)              /* IN/OUT */
9808 {
9809         kern_return_t   ret;
9810         vm_map_size_t sync_size;
9811         vm_map_entry_t entry;
9812
9813         if (start < vm_map_min(map) || end > vm_map_max(map))
9814                 return KERN_INVALID_ADDRESS;
9815
9816         /* Figure how much memory we need to flush (in page increments) */
9817         sync_size = end - start;
9818
9819         vm_map_lock(map);
9820
9821         if (attribute != MATTR_CACHE) {
9822                 /* If we don't have to find physical addresses, we */
9823                 /* don't have to do an explicit traversal here.    */
9824                 ret = pmap_attribute(map->pmap, start, end-start,
9825                                      attribute, value);
9826                 vm_map_unlock(map);
9827                 return ret;
9828         }
9829
9830         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
9831
9832         while(sync_size) {
9833                 if (vm_map_lookup_entry(map, start, &entry)) {
9834                         vm_map_size_t   sub_size;
9835                         if((entry->vme_end - start) > sync_size) {
9836                                 sub_size = sync_size;
9837                                 sync_size = 0;
9838                         } else {
9839                                 sub_size = entry->vme_end - start;
9840                                 sync_size -= sub_size;
9841                         }
9842                         if(entry->is_sub_map) {
9843                                 vm_map_offset_t sub_start;
9844                                 vm_map_offset_t sub_end;
9845
9846                                 sub_start = (start - entry->vme_start)
9847                                         + entry->offset;
9848                                 sub_end = sub_start + sub_size;
9849                                 vm_map_machine_attribute(
9850                                         entry->object.sub_map,
9851                                         sub_start,
9852                                         sub_end,
9853                                         attribute, value);
9854                         } else {
9855                                 if(entry->object.vm_object) {
9856                                         vm_page_t               m;
9857                                         vm_object_t             object;
9858                                         vm_object_t             base_object;
9859                                         vm_object_t             last_object;
9860                                         vm_object_offset_t      offset;
9861                                         vm_object_offset_t      base_offset;
9862                                         vm_map_size_t           range;
9863                                         range = sub_size;
9864                                         offset = (start - entry->vme_start)
9865                                                 + entry->offset;
9866                                         base_offset = offset;
9867                                         object = entry->object.vm_object;
9868                                         base_object = object;
9869                                         last_object = NULL;
9870
9871                                         vm_object_lock(object);
9872
9873                                         while (range) {
9874                                                 m = vm_page_lookup(
9875                                                         object, offset);
9876
9877                                                 if (m && !m->fictitious) {
9878                                                         ret =
9879                                                                 pmap_attribute_cache_sync(
9880                                                                         m->phys_page,
9881                                                                         PAGE_SIZE,
9882                                                                         attribute, value);
9883
9884                                                 } else if (object->shadow) {
9885                                                         offset = offset + object->shadow_offset;
9886                                                         last_object = object;
9887                                                         object = object->shadow;
9888                                                         vm_object_lock(last_object->shadow);
9889                                                         vm_object_unlock(last_object);
9890                                                         continue;
9891                                                 }
9892                                                 range -= PAGE_SIZE;
9893
9894                                                 if (base_object != object) {
9895                                                         vm_object_unlock(object);
9896                                                         vm_object_lock(base_object);
9897                                                         object = base_object;
9898                                                 }
9899                                                 /* Bump to the next page */
9900                                                 base_offset += PAGE_SIZE;
9901                                                 offset = base_offset;
9902                                         }
9903                                         vm_object_unlock(object);
9904                                 }
9905                         }
9906                         start += sub_size;
9907                 } else {
9908                         vm_map_unlock(map);
9909                         return KERN_FAILURE;
9910                 }
9911
9912         }
9913
9914         vm_map_unlock(map);
9915
9916         return ret;
9917 }
9918
9919 /*
9920  *      vm_map_behavior_set:
9921  *
9922  *      Sets the paging reference behavior of the specified address
9923  *      range in the target map.  Paging reference behavior affects
9924  *      how pagein operations resulting from faults on the map will be
9925  *      clustered.
9926  */
9927 kern_return_t
9928 vm_map_behavior_set(
9929         vm_map_t        map,
9930         vm_map_offset_t start,
9931         vm_map_offset_t end,
9932         vm_behavior_t   new_behavior)
9933 {
9934         register vm_map_entry_t entry;
9935         vm_map_entry_t  temp_entry;
9936
9937         XPR(XPR_VM_MAP,
9938             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9939             map, start, end, new_behavior, 0);
9940
9941         switch (new_behavior) {
9942
9943         /*
9944          * This first block of behaviors all set a persistent state on the specified
9945          * memory range.  All we have to do here is to record the desired behavior
9946          * in the vm_map_entry_t's.
9947          */
9948
9949         case VM_BEHAVIOR_DEFAULT:
9950         case VM_BEHAVIOR_RANDOM:
9951         case VM_BEHAVIOR_SEQUENTIAL:
9952         case VM_BEHAVIOR_RSEQNTL:
9953         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
9954                 vm_map_lock(map);
9955
9956                 /*
9957                  *      The entire address range must be valid for the map.
9958                  *      Note that vm_map_range_check() does a
9959                  *      vm_map_lookup_entry() internally and returns the
9960                  *      entry containing the start of the address range if
9961                  *      the entire range is valid.
9962                  */
9963                 if (vm_map_range_check(map, start, end, &temp_entry)) {
9964                         entry = temp_entry;
9965                         vm_map_clip_start(map, entry, start);
9966                 }
9967                 else {
9968                         vm_map_unlock(map);
9969                         return(KERN_INVALID_ADDRESS);
9970                 }
9971
9972                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9973                         vm_map_clip_end(map, entry, end);
9974                         assert(!entry->use_pmap);
9975
9976                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
9977                                 entry->zero_wired_pages = TRUE;
9978                         } else {
9979                                 entry->behavior = new_behavior;
9980                         }
9981                         entry = entry->vme_next;
9982                 }
9983
9984                 vm_map_unlock(map);
9985                 break;
9986
9987         /*
9988          * The rest of these are different from the above in that they cause
9989          * an immediate action to take place as opposed to setting a behavior that
9990          * affects future actions.
9991          */
9992
9993         case VM_BEHAVIOR_WILLNEED:
9994                 return vm_map_willneed(map, start, end);
9995
9996         case VM_BEHAVIOR_DONTNEED:
9997                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
9998
9999         case VM_BEHAVIOR_FREE:
10000                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10001
10002         case VM_BEHAVIOR_REUSABLE:
10003                 return vm_map_reusable_pages(map, start, end);
10004
10005         case VM_BEHAVIOR_REUSE:
10006                 return vm_map_reuse_pages(map, start, end);
10007
10008         case VM_BEHAVIOR_CAN_REUSE:
10009                 return vm_map_can_reuse(map, start, end);
10010
10011         default:
10012                 return(KERN_INVALID_ARGUMENT);
10013         }
10014
10015         return(KERN_SUCCESS);
10016 }
10017
10018
10019 /*
10020  * Internals for madvise(MADV_WILLNEED) system call.
10021  *
10022  * The present implementation is to do a read-ahead if the mapping corresponds
10023  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10024  * and basically ignore the "advice" (which we are always free to do).
10025  */
10026
10027
10028 static kern_return_t
10029 vm_map_willneed(
10030         vm_map_t        map,
10031         vm_map_offset_t start,
10032         vm_map_offset_t end
10033 )
10034 {
10035         vm_map_entry_t                  entry;
10036         vm_object_t                     object;
10037         memory_object_t                 pager;
10038         struct vm_object_fault_info     fault_info;
10039         kern_return_t                   kr;
10040         vm_object_size_t                len;
10041         vm_object_offset_t              offset;
10042
10043         /*
10044          * Fill in static values in fault_info.  Several fields get ignored by the code
10045          * we call, but we'll fill them in anyway since uninitialized fields are bad
10046          * when it comes to future backwards compatibility.
10047          */
10048
10049         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10050         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10051         fault_info.no_cache      = FALSE;                       /* ignored value */
10052         fault_info.stealth       = TRUE;
10053
10054         /*
10055          * The MADV_WILLNEED operation doesn't require any changes to the
10056          * vm_map_entry_t's, so the read lock is sufficient.
10057          */
10058
10059         vm_map_lock_read(map);
10060
10061         /*
10062          * The madvise semantics require that the address range be fully
10063          * allocated with no holes.  Otherwise, we're required to return
10064          * an error.
10065          */
10066
10067         if (vm_map_range_check(map, start, end, &entry)) {
10068
10069                 /*
10070                  * Examine each vm_map_entry_t in the range.
10071                  */
10072
10073                 for (; entry->vme_start < end; start += len, entry = entry->vme_next) {
10074
10075                         /*
10076                          * The first time through, the start address could be anywhere within the
10077                          * vm_map_entry we found.  So adjust the offset to correspond.  After that,
10078                          * the offset will always be zero to correspond to the beginning of the current
10079                          * vm_map_entry.
10080                          */
10081
10082                         offset = (start - entry->vme_start) + entry->offset;
10083
10084                         /*
10085                          * Set the length so we don't go beyond the end of the map_entry or beyond the
10086                          * end of the range we were given.  This range could span also multiple map
10087                          * entries all of which map different files, so make sure we only do the right
10088                          * amount of I/O for each object.  Note that it's possible for there to be
10089                          * multiple map entries all referring to the same object but with different
10090                          * page permissions, but it's not worth trying to optimize that case.
10091                          */
10092
10093                         len = MIN(entry->vme_end - start, end - start);
10094
10095                         if ((vm_size_t) len != len) {
10096                                 /* 32-bit overflow */
10097                                 len = (vm_size_t) (0 - PAGE_SIZE);
10098                         }
10099                         fault_info.cluster_size = (vm_size_t) len;
10100                         fault_info.lo_offset    = offset;
10101                         fault_info.hi_offset    = offset + len;
10102                         fault_info.user_tag     = entry->alias;
10103
10104                         /*
10105                          * If there's no read permission to this mapping, then just skip it.
10106                          */
10107
10108                         if ((entry->protection & VM_PROT_READ) == 0) {
10109                                 continue;
10110                         }
10111
10112                         /*
10113                          * Find the file object backing this map entry.  If there is none,
10114                          * then we simply ignore the "will need" advice for this entry and
10115                          * go on to the next one.
10116                          */
10117
10118                         if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10119                                 continue;
10120                         }
10121
10122                         vm_object_paging_begin(object);
10123                         pager = object->pager;
10124                         vm_object_unlock(object);
10125
10126                         /*
10127                          * Get the data from the object asynchronously.
10128                          *
10129                          * Note that memory_object_data_request() places limits on the amount
10130                          * of I/O it will do.  Regardless of the len we specified, it won't do
10131                          * more than MAX_UPL_TRANSFER and it silently truncates the len to that
10132                          * size.  This isn't necessarily bad since madvise shouldn't really be
10133                          * used to page in unlimited amounts of data.  Other Unix variants limit
10134                          * the willneed case as well.  If this turns out to be an issue for
10135                          * developers, then we can always adjust the policy here and still be
10136                          * backwards compatible since this is all just "advice".
10137                          */
10138
10139                         kr = memory_object_data_request(
10140                                 pager,
10141                                 offset + object->paging_offset,
10142                                 0,      /* ignored */
10143                                 VM_PROT_READ,
10144                                 (memory_object_fault_info_t)&fault_info);
10145
10146                         vm_object_lock(object);
10147                         vm_object_paging_end(object);
10148                         vm_object_unlock(object);
10149
10150                         /*
10151                          * If we couldn't do the I/O for some reason, just give up on the
10152                          * madvise.  We still return success to the user since madvise isn't
10153                          * supposed to fail when the advice can't be taken.
10154                          */
10155
10156                         if (kr != KERN_SUCCESS) {
10157                                 break;
10158                         }
10159                 }
10160
10161                 kr = KERN_SUCCESS;
10162         } else
10163                 kr = KERN_INVALID_ADDRESS;
10164
10165         vm_map_unlock_read(map);
10166         return kr;
10167 }
10168
10169 static boolean_t
10170 vm_map_entry_is_reusable(
10171         vm_map_entry_t entry)
10172 {
10173         vm_object_t object;
10174
10175         if (entry->is_shared ||
10176             entry->is_sub_map ||
10177             entry->in_transition ||
10178             entry->protection != VM_PROT_DEFAULT ||
10179             entry->max_protection != VM_PROT_ALL ||
10180             entry->inheritance != VM_INHERIT_DEFAULT ||
10181             entry->no_cache ||
10182             entry->permanent ||
10183             entry->superpage_size != 0 ||
10184             entry->zero_wired_pages ||
10185             entry->wired_count != 0 ||
10186             entry->user_wired_count != 0) {
10187                 return FALSE;
10188         }
10189
10190         object = entry->object.vm_object;
10191         if (object == VM_OBJECT_NULL) {
10192                 return TRUE;
10193         }
10194         if (object->ref_count == 1 &&
10195             object->wired_page_count == 0 &&
10196             object->copy == VM_OBJECT_NULL &&
10197             object->shadow == VM_OBJECT_NULL &&
10198             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10199             object->internal &&
10200             !object->true_share &&
10201             object->wimg_bits == VM_WIMG_DEFAULT &&
10202             !object->code_signed) {
10203                 return TRUE;
10204         }
10205         return FALSE;
10206
10207
10208 }
10209
10210 static kern_return_t
10211 vm_map_reuse_pages(
10212         vm_map_t        map,
10213         vm_map_offset_t start,
10214         vm_map_offset_t end)
10215 {
10216         vm_map_entry_t                  entry;
10217         vm_object_t                     object;
10218         vm_object_offset_t              start_offset, end_offset;
10219
10220         /*
10221          * The MADV_REUSE operation doesn't require any changes to the
10222          * vm_map_entry_t's, so the read lock is sufficient.
10223          */
10224
10225         vm_map_lock_read(map);
10226
10227         /*
10228          * The madvise semantics require that the address range be fully
10229          * allocated with no holes.  Otherwise, we're required to return
10230          * an error.
10231          */
10232
10233         if (!vm_map_range_check(map, start, end, &entry)) {
10234                 vm_map_unlock_read(map);
10235                 vm_page_stats_reusable.reuse_pages_failure++;
10236                 return KERN_INVALID_ADDRESS;
10237         }
10238
10239         /*
10240          * Examine each vm_map_entry_t in the range.
10241          */
10242         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10243              entry = entry->vme_next) {
10244                 /*
10245                  * Sanity check on the VM map entry.
10246                  */
10247                 if (! vm_map_entry_is_reusable(entry)) {
10248                         vm_map_unlock_read(map);
10249                         vm_page_stats_reusable.reuse_pages_failure++;
10250                         return KERN_INVALID_ADDRESS;
10251                 }
10252
10253                 /*
10254                  * The first time through, the start address could be anywhere
10255                  * within the vm_map_entry we found.  So adjust the offset to
10256                  * correspond.
10257                  */
10258                 if (entry->vme_start < start) {
10259                         start_offset = start - entry->vme_start;
10260                 } else {
10261                         start_offset = 0;
10262                 }
10263                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10264                 start_offset += entry->offset;
10265                 end_offset += entry->offset;
10266
10267                 object = entry->object.vm_object;
10268                 if (object != VM_OBJECT_NULL) {
10269                         vm_object_lock(object);
10270                         vm_object_reuse_pages(object, start_offset, end_offset,
10271                                               TRUE);
10272                         vm_object_unlock(object);
10273                 }
10274
10275                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10276                         /*
10277                          * XXX
10278                          * We do not hold the VM map exclusively here.
10279                          * The "alias" field is not that critical, so it's
10280                          * safe to update it here, as long as it is the only
10281                          * one that can be modified while holding the VM map
10282                          * "shared".
10283                          */
10284                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10285                 }
10286         }
10287
10288         vm_map_unlock_read(map);
10289         vm_page_stats_reusable.reuse_pages_success++;
10290         return KERN_SUCCESS;
10291 }
10292
10293
10294 static kern_return_t
10295 vm_map_reusable_pages(
10296         vm_map_t        map,
10297         vm_map_offset_t start,
10298         vm_map_offset_t end)
10299 {
10300         vm_map_entry_t                  entry;
10301         vm_object_t                     object;
10302         vm_object_offset_t              start_offset, end_offset;
10303
10304         /*
10305          * The MADV_REUSABLE operation doesn't require any changes to the
10306          * vm_map_entry_t's, so the read lock is sufficient.
10307          */
10308
10309         vm_map_lock_read(map);
10310
10311         /*
10312          * The madvise semantics require that the address range be fully
10313          * allocated with no holes.  Otherwise, we're required to return
10314          * an error.
10315          */
10316
10317         if (!vm_map_range_check(map, start, end, &entry)) {
10318                 vm_map_unlock_read(map);
10319                 vm_page_stats_reusable.reusable_pages_failure++;
10320                 return KERN_INVALID_ADDRESS;
10321         }
10322
10323         /*
10324          * Examine each vm_map_entry_t in the range.
10325          */
10326         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10327              entry = entry->vme_next) {
10328                 int kill_pages = 0;
10329
10330                 /*
10331                  * Sanity check on the VM map entry.
10332                  */
10333                 if (! vm_map_entry_is_reusable(entry)) {
10334                         vm_map_unlock_read(map);
10335                         vm_page_stats_reusable.reusable_pages_failure++;
10336                         return KERN_INVALID_ADDRESS;
10337                 }
10338
10339                 /*
10340                  * The first time through, the start address could be anywhere
10341                  * within the vm_map_entry we found.  So adjust the offset to
10342                  * correspond.
10343                  */
10344                 if (entry->vme_start < start) {
10345                         start_offset = start - entry->vme_start;
10346                 } else {
10347                         start_offset = 0;
10348                 }
10349                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10350                 start_offset += entry->offset;
10351                 end_offset += entry->offset;
10352
10353                 object = entry->object.vm_object;
10354                 if (object == VM_OBJECT_NULL)
10355                         continue;
10356
10357
10358                 vm_object_lock(object);
10359                 if (object->ref_count == 1 && !object->shadow)
10360                         kill_pages = 1;
10361                 else
10362                         kill_pages = -1;
10363                 if (kill_pages != -1) {
10364                         vm_object_deactivate_pages(object,
10365                                                    start_offset,
10366                                                    end_offset - start_offset,
10367                                                    kill_pages,
10368                                                    TRUE /*reusable_pages*/);
10369                 } else {
10370                         vm_page_stats_reusable.reusable_pages_shared++;
10371                 }
10372                 vm_object_unlock(object);
10373
10374                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10375                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10376                         /*
10377                          * XXX
10378                          * We do not hold the VM map exclusively here.
10379                          * The "alias" field is not that critical, so it's
10380                          * safe to update it here, as long as it is the only
10381                          * one that can be modified while holding the VM map
10382                          * "shared".
10383                          */
10384                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10385                 }
10386         }
10387
10388         vm_map_unlock_read(map);
10389         vm_page_stats_reusable.reusable_pages_success++;
10390         return KERN_SUCCESS;
10391 }
10392
10393
10394 static kern_return_t
10395 vm_map_can_reuse(
10396         vm_map_t        map,
10397         vm_map_offset_t start,
10398         vm_map_offset_t end)
10399 {
10400         vm_map_entry_t                  entry;
10401
10402         /*
10403          * The MADV_REUSABLE operation doesn't require any changes to the
10404          * vm_map_entry_t's, so the read lock is sufficient.
10405          */
10406
10407         vm_map_lock_read(map);
10408
10409         /*
10410          * The madvise semantics require that the address range be fully
10411          * allocated with no holes.  Otherwise, we're required to return
10412          * an error.
10413          */
10414
10415         if (!vm_map_range_check(map, start, end, &entry)) {
10416                 vm_map_unlock_read(map);
10417                 vm_page_stats_reusable.can_reuse_failure++;
10418                 return KERN_INVALID_ADDRESS;
10419         }
10420
10421         /*
10422          * Examine each vm_map_entry_t in the range.
10423          */
10424         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10425              entry = entry->vme_next) {
10426                 /*
10427                  * Sanity check on the VM map entry.
10428                  */
10429                 if (! vm_map_entry_is_reusable(entry)) {
10430                         vm_map_unlock_read(map);
10431                         vm_page_stats_reusable.can_reuse_failure++;
10432                         return KERN_INVALID_ADDRESS;
10433                 }
10434         }
10435
10436         vm_map_unlock_read(map);
10437         vm_page_stats_reusable.can_reuse_success++;
10438         return KERN_SUCCESS;
10439 }
10440
10441
10442
10443 #include <mach_kdb.h>
10444 #if     MACH_KDB
10445 #include <ddb/db_output.h>
10446 #include <vm/vm_print.h>
10447
10448 #define printf  db_printf
10449
10450 /*
10451  * Forward declarations for internal functions.
10452  */
10453 extern void vm_map_links_print(
10454         struct vm_map_links     *links);
10455
10456 extern void vm_map_header_print(
10457         struct vm_map_header    *header);
10458
10459 extern void vm_map_entry_print(
10460         vm_map_entry_t          entry);
10461
10462 extern void vm_follow_entry(
10463         vm_map_entry_t          entry);
10464
10465 extern void vm_follow_map(
10466         vm_map_t                map);
10467
10468 /*
10469  *      vm_map_links_print:     [ debug ]
10470  */
10471 void
10472 vm_map_links_print(
10473         struct vm_map_links     *links)
10474 {
10475         iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
10476                 links->prev,
10477                 links->next,
10478                 (unsigned long long)links->start,
10479                 (unsigned long long)links->end);
10480 }
10481
10482 /*
10483  *      vm_map_header_print:    [ debug ]
10484  */
10485 void
10486 vm_map_header_print(
10487         struct vm_map_header    *header)
10488 {
10489         vm_map_links_print(&header->links);
10490         iprintf("nentries = %08X, %sentries_pageable\n",
10491                 header->nentries,
10492                 (header->entries_pageable ? "" : "!"));
10493 }
10494
10495 /*
10496  *      vm_follow_entry:        [ debug ]
10497  */
10498 void
10499 vm_follow_entry(
10500         vm_map_entry_t entry)
10501 {
10502         int shadows;
10503
10504         iprintf("map entry %08X\n", entry);
10505
10506         db_indent += 2;
10507
10508         shadows = vm_follow_object(entry->object.vm_object);
10509         iprintf("Total objects : %d\n",shadows);
10510
10511         db_indent -= 2;
10512 }
10513
10514 /*
10515  *      vm_map_entry_print:     [ debug ]
10516  */
10517 void
10518 vm_map_entry_print(
10519         register vm_map_entry_t entry)
10520 {
10521         static const char *inheritance_name[4] =
10522                 { "share", "copy", "none", "?"};
10523         static const char *behavior_name[4] =
10524                 { "dflt", "rand", "seqtl", "rseqntl" };
10525
10526         iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10527
10528         db_indent += 2;
10529
10530         vm_map_links_print(&entry->links);
10531
10532         iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
10533                 (unsigned long long)entry->vme_start,
10534                 (unsigned long long)entry->vme_end,
10535                 entry->protection,
10536                 entry->max_protection,
10537                 inheritance_name[(entry->inheritance & 0x3)]);
10538
10539         iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10540                 behavior_name[(entry->behavior & 0x3)],
10541                 entry->wired_count,
10542                 entry->user_wired_count);
10543         iprintf("%sin_transition, %sneeds_wakeup\n",
10544                 (entry->in_transition ? "" : "!"),
10545                 (entry->needs_wakeup ? "" : "!"));
10546
10547         if (entry->is_sub_map) {
10548                 iprintf("submap = %08X - offset = %016llX\n",
10549                         entry->object.sub_map,
10550                         (unsigned long long)entry->offset);
10551         } else {
10552                 iprintf("object = %08X  offset = %016llX - ",
10553                         entry->object.vm_object,
10554                         (unsigned long long)entry->offset);
10555                 printf("%sis_shared, %sneeds_copy\n",
10556                        (entry->is_shared ? "" : "!"),
10557                        (entry->needs_copy ? "" : "!"));
10558         }
10559
10560         db_indent -= 2;
10561 }
10562
10563 /*
10564  *      vm_follow_map:  [ debug ]
10565  */
10566 void
10567 vm_follow_map(
10568         vm_map_t map)
10569 {
10570         register vm_map_entry_t entry;
10571
10572         iprintf("task map %08X\n", map);
10573
10574         db_indent += 2;
10575
10576         for (entry = vm_map_first_entry(map);
10577              entry && entry != vm_map_to_entry(map);
10578              entry = entry->vme_next) {
10579                 vm_follow_entry(entry);
10580         }
10581
10582         db_indent -= 2;
10583 }
10584
10585 /*
10586  *      vm_map_print:   [ debug ]
10587  */
10588 void
10589 vm_map_print(
10590         db_addr_t inmap)
10591 {
10592         register vm_map_entry_t entry;
10593         vm_map_t map;
10594 #if TASK_SWAPPER
10595         char *swstate;
10596 #endif /* TASK_SWAPPER */
10597
10598         map = (vm_map_t)(long)
10599                 inmap;  /* Make sure we have the right type */
10600
10601         iprintf("task map %08X\n", map);
10602
10603         db_indent += 2;
10604
10605         vm_map_header_print(&map->hdr);
10606
10607         iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
10608                 map->pmap,
10609                 map->size,
10610                 map->ref_count,
10611                 map->hint,
10612                 map->first_free);
10613
10614         iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10615                 (map->wait_for_space ? "" : "!"),
10616                 (map->wiring_required ? "" : "!"),
10617                 map->timestamp);
10618
10619 #if     TASK_SWAPPER
10620         switch (map->sw_state) {
10621         case MAP_SW_IN:
10622                 swstate = "SW_IN";
10623                 break;
10624         case MAP_SW_OUT:
10625                 swstate = "SW_OUT";
10626                 break;
10627         default:
10628                 swstate = "????";
10629                 break;
10630         }
10631         iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10632 #endif  /* TASK_SWAPPER */
10633
10634         for (entry = vm_map_first_entry(map);
10635              entry && entry != vm_map_to_entry(map);
10636              entry = entry->vme_next) {
10637                 vm_map_entry_print(entry);
10638         }
10639
10640         db_indent -= 2;
10641 }
10642
10643 /*
10644  *      Routine:        vm_map_copy_print
10645  *      Purpose:
10646  *              Pretty-print a copy object for ddb.
10647  */
10648
10649 void
10650 vm_map_copy_print(
10651         db_addr_t       incopy)
10652 {
10653         vm_map_copy_t copy;
10654         vm_map_entry_t entry;
10655
10656         copy = (vm_map_copy_t)(long)
10657                 incopy; /* Make sure we have the right type */
10658
10659         printf("copy object 0x%x\n", copy);
10660
10661         db_indent += 2;
10662
10663         iprintf("type=%d", copy->type);
10664         switch (copy->type) {
10665         case VM_MAP_COPY_ENTRY_LIST:
10666                 printf("[entry_list]");
10667                 break;
10668
10669         case VM_MAP_COPY_OBJECT:
10670                 printf("[object]");
10671                 break;
10672
10673         case VM_MAP_COPY_KERNEL_BUFFER:
10674                 printf("[kernel_buffer]");
10675                 break;
10676
10677         default:
10678                 printf("[bad type]");
10679                 break;
10680         }
10681         printf(", offset=0x%llx", (unsigned long long)copy->offset);
10682         printf(", size=0x%x\n", copy->size);
10683
10684         switch (copy->type) {
10685         case VM_MAP_COPY_ENTRY_LIST:
10686                 vm_map_header_print(&copy->cpy_hdr);
10687                 for (entry = vm_map_copy_first_entry(copy);
10688                      entry && entry != vm_map_copy_to_entry(copy);
10689                      entry = entry->vme_next) {
10690                         vm_map_entry_print(entry);
10691                 }
10692                 break;
10693
10694         case VM_MAP_COPY_OBJECT:
10695                 iprintf("object=0x%x\n", copy->cpy_object);
10696                 break;
10697
10698         case VM_MAP_COPY_KERNEL_BUFFER:
10699                 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
10700                 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
10701                 break;
10702
10703         }
10704
10705         db_indent -=2;
10706 }
10707
10708 /*
10709  *      db_vm_map_total_size(map)       [ debug ]
10710  *
10711  *      return the total virtual size (in bytes) of the map
10712  */
10713 vm_map_size_t
10714 db_vm_map_total_size(
10715         db_addr_t       inmap)
10716 {
10717         vm_map_entry_t  entry;
10718         vm_map_size_t   total;
10719         vm_map_t map;
10720
10721         map = (vm_map_t)(long)
10722                 inmap;  /* Make sure we have the right type */
10723
10724         total = 0;
10725         for (entry = vm_map_first_entry(map);
10726              entry != vm_map_to_entry(map);
10727              entry = entry->vme_next) {
10728                 total += entry->vme_end - entry->vme_start;
10729         }
10730
10731         return total;
10732 }
10733
10734 #endif  /* MACH_KDB */
10735
10736 /*
10737  *      Routine:        vm_map_entry_insert
10738  *
10739  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
10740  */
10741 vm_map_entry_t
10742 vm_map_entry_insert(
10743         vm_map_t                map,
10744         vm_map_entry_t          insp_entry,
10745         vm_map_offset_t         start,
10746         vm_map_offset_t         end,
10747         vm_object_t             object,
10748         vm_object_offset_t      offset,
10749         boolean_t               needs_copy,
10750         boolean_t               is_shared,
10751         boolean_t               in_transition,
10752         vm_prot_t               cur_protection,
10753         vm_prot_t               max_protection,
10754         vm_behavior_t           behavior,
10755         vm_inherit_t            inheritance,
10756         unsigned                wired_count,
10757         boolean_t               no_cache,
10758         boolean_t               permanent,
10759         unsigned int            superpage_size)
10760 {
10761         vm_map_entry_t  new_entry;
10762
10763         assert(insp_entry != (vm_map_entry_t)0);
10764
10765         new_entry = vm_map_entry_create(map);
10766
10767         new_entry->vme_start = start;
10768         new_entry->vme_end = end;
10769         assert(page_aligned(new_entry->vme_start));
10770         assert(page_aligned(new_entry->vme_end));
10771
10772         new_entry->object.vm_object = object;
10773         new_entry->offset = offset;
10774         new_entry->is_shared = is_shared;
10775         new_entry->is_sub_map = FALSE;
10776         new_entry->needs_copy = needs_copy;
10777         new_entry->in_transition = in_transition;
10778         new_entry->needs_wakeup = FALSE;
10779         new_entry->inheritance = inheritance;
10780         new_entry->protection = cur_protection;
10781         new_entry->max_protection = max_protection;
10782         new_entry->behavior = behavior;
10783         new_entry->wired_count = wired_count;
10784         new_entry->user_wired_count = 0;
10785         new_entry->use_pmap = FALSE;
10786         new_entry->alias = 0;
10787         new_entry->zero_wired_pages = FALSE;
10788         new_entry->no_cache = no_cache;
10789         new_entry->permanent = permanent;
10790         new_entry->superpage_size = superpage_size;
10791
10792         /*
10793          *      Insert the new entry into the list.
10794          */
10795
10796         vm_map_entry_link(map, insp_entry, new_entry);
10797         map->size += end - start;
10798
10799         /*
10800          *      Update the free space hint and the lookup hint.
10801          */
10802
10803         SAVE_HINT_MAP_WRITE(map, new_entry);
10804         return new_entry;
10805 }
10806
10807 /*
10808  *      Routine:        vm_map_remap_extract
10809  *
10810  *      Descritpion:    This routine returns a vm_entry list from a map.
10811  */
10812 static kern_return_t
10813 vm_map_remap_extract(
10814         vm_map_t                map,
10815         vm_map_offset_t         addr,
10816         vm_map_size_t           size,
10817         boolean_t               copy,
10818         struct vm_map_header    *map_header,
10819         vm_prot_t               *cur_protection,
10820         vm_prot_t               *max_protection,
10821         /* What, no behavior? */
10822         vm_inherit_t            inheritance,
10823         boolean_t               pageable)
10824 {
10825         kern_return_t           result;
10826         vm_map_size_t           mapped_size;
10827         vm_map_size_t           tmp_size;
10828         vm_map_entry_t          src_entry;     /* result of last map lookup */
10829         vm_map_entry_t          new_entry;
10830         vm_object_offset_t      offset;
10831         vm_map_offset_t         map_address;
10832         vm_map_offset_t         src_start;     /* start of entry to map */
10833         vm_map_offset_t         src_end;       /* end of region to be mapped */
10834         vm_object_t             object;
10835         vm_map_version_t        version;
10836         boolean_t               src_needs_copy;
10837         boolean_t               new_entry_needs_copy;
10838
10839         assert(map != VM_MAP_NULL);
10840         assert(size != 0 && size == vm_map_round_page(size));
10841         assert(inheritance == VM_INHERIT_NONE ||
10842                inheritance == VM_INHERIT_COPY ||
10843                inheritance == VM_INHERIT_SHARE);
10844
10845         /*
10846          *      Compute start and end of region.
10847          */
10848         src_start = vm_map_trunc_page(addr);
10849         src_end = vm_map_round_page(src_start + size);
10850
10851         /*
10852          *      Initialize map_header.
10853          */
10854         map_header->links.next = (struct vm_map_entry *)&map_header->links;
10855         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
10856         map_header->nentries = 0;
10857         map_header->entries_pageable = pageable;
10858
10859         *cur_protection = VM_PROT_ALL;
10860         *max_protection = VM_PROT_ALL;
10861
10862         map_address = 0;
10863         mapped_size = 0;
10864         result = KERN_SUCCESS;
10865
10866         /*
10867          *      The specified source virtual space might correspond to
10868          *      multiple map entries, need to loop on them.
10869          */
10870         vm_map_lock(map);
10871         while (mapped_size != size) {
10872                 vm_map_size_t   entry_size;
10873
10874                 /*
10875                  *      Find the beginning of the region.
10876                  */
10877                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
10878                         result = KERN_INVALID_ADDRESS;
10879                         break;
10880                 }
10881
10882                 if (src_start < src_entry->vme_start ||
10883                     (mapped_size && src_start != src_entry->vme_start)) {
10884                         result = KERN_INVALID_ADDRESS;
10885                         break;
10886                 }
10887
10888                 tmp_size = size - mapped_size;
10889                 if (src_end > src_entry->vme_end)
10890                         tmp_size -= (src_end - src_entry->vme_end);
10891
10892                 entry_size = (vm_map_size_t)(src_entry->vme_end -
10893                                              src_entry->vme_start);
10894
10895                 if(src_entry->is_sub_map) {
10896                         vm_map_reference(src_entry->object.sub_map);
10897                         object = VM_OBJECT_NULL;
10898                 } else {
10899                         object = src_entry->object.vm_object;
10900
10901                         if (object == VM_OBJECT_NULL) {
10902                                 object = vm_object_allocate(entry_size);
10903                                 src_entry->offset = 0;
10904                                 src_entry->object.vm_object = object;
10905                         } else if (object->copy_strategy !=
10906                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
10907                                 /*
10908                                  *      We are already using an asymmetric
10909                                  *      copy, and therefore we already have
10910                                  *      the right object.
10911                                  */
10912                                 assert(!src_entry->needs_copy);
10913                         } else if (src_entry->needs_copy || object->shadowed ||
10914                                    (object->internal && !object->true_share &&
10915                                     !src_entry->is_shared &&
10916                                     object->size > entry_size)) {
10917
10918                                 vm_object_shadow(&src_entry->object.vm_object,
10919                                                  &src_entry->offset,
10920                                                  entry_size);
10921
10922                                 if (!src_entry->needs_copy &&
10923                                     (src_entry->protection & VM_PROT_WRITE)) {
10924                                         vm_prot_t prot;
10925
10926                                         prot = src_entry->protection & ~VM_PROT_WRITE;
10927
10928                                         if (override_nx(map, src_entry->alias) && prot)
10929                                                 prot |= VM_PROT_EXECUTE;
10930
10931                                         if(map->mapped) {
10932                                                 vm_object_pmap_protect(
10933                                                         src_entry->object.vm_object,
10934                                                         src_entry->offset,
10935                                                         entry_size,
10936                                                         PMAP_NULL,
10937                                                         src_entry->vme_start,
10938                                                         prot);
10939                                         } else {
10940                                                 pmap_protect(vm_map_pmap(map),
10941                                                              src_entry->vme_start,
10942                                                              src_entry->vme_end,
10943                                                              prot);
10944                                         }
10945                                 }
10946
10947                                 object = src_entry->object.vm_object;
10948                                 src_entry->needs_copy = FALSE;
10949                         }
10950
10951
10952                         vm_object_lock(object);
10953                         vm_object_reference_locked(object); /* object ref. for new entry */
10954                         if (object->copy_strategy ==
10955                             MEMORY_OBJECT_COPY_SYMMETRIC) {
10956                                 object->copy_strategy =
10957                                         MEMORY_OBJECT_COPY_DELAY;
10958                         }
10959                         vm_object_unlock(object);
10960                 }
10961
10962                 offset = src_entry->offset + (src_start - src_entry->vme_start);
10963
10964                 new_entry = _vm_map_entry_create(map_header);
10965                 vm_map_entry_copy(new_entry, src_entry);
10966                 new_entry->use_pmap = FALSE; /* clr address space specifics */
10967
10968                 new_entry->vme_start = map_address;
10969                 new_entry->vme_end = map_address + tmp_size;
10970                 new_entry->inheritance = inheritance;
10971                 new_entry->offset = offset;
10972
10973                 /*
10974                  * The new region has to be copied now if required.
10975                  */
10976         RestartCopy:
10977                 if (!copy) {
10978                         src_entry->is_shared = TRUE;
10979                         new_entry->is_shared = TRUE;
10980                         if (!(new_entry->is_sub_map))
10981                                 new_entry->needs_copy = FALSE;
10982
10983                 } else if (src_entry->is_sub_map) {
10984                         /* make this a COW sub_map if not already */
10985                         new_entry->needs_copy = TRUE;
10986                         object = VM_OBJECT_NULL;
10987                 } else if (src_entry->wired_count == 0 &&
10988                            vm_object_copy_quickly(&new_entry->object.vm_object,
10989                                                   new_entry->offset,
10990                                                   (new_entry->vme_end -
10991                                                    new_entry->vme_start),
10992                                                   &src_needs_copy,
10993                                                   &new_entry_needs_copy)) {
10994
10995                         new_entry->needs_copy = new_entry_needs_copy;
10996                         new_entry->is_shared = FALSE;
10997
10998                         /*
10999                          * Handle copy_on_write semantics.
11000                          */
11001                         if (src_needs_copy && !src_entry->needs_copy) {
11002                                 vm_prot_t prot;
11003
11004                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11005
11006                                 if (override_nx(map, src_entry->alias) && prot)
11007                                         prot |= VM_PROT_EXECUTE;
11008
11009                                 vm_object_pmap_protect(object,
11010                                                        offset,
11011                                                        entry_size,
11012                                                        ((src_entry->is_shared
11013                                                          || map->mapped) ?
11014                                                         PMAP_NULL : map->pmap),
11015                                                        src_entry->vme_start,
11016                                                        prot);
11017
11018                                 src_entry->needs_copy = TRUE;
11019                         }
11020                         /*
11021                          * Throw away the old object reference of the new entry.
11022                          */
11023                         vm_object_deallocate(object);
11024
11025                 } else {
11026                         new_entry->is_shared = FALSE;
11027
11028                         /*
11029                          * The map can be safely unlocked since we
11030                          * already hold a reference on the object.
11031                          *
11032                          * Record the timestamp of the map for later
11033                          * verification, and unlock the map.
11034                          */
11035                         version.main_timestamp = map->timestamp;
11036                         vm_map_unlock(map);     /* Increments timestamp once! */
11037
11038                         /*
11039                          * Perform the copy.
11040                          */
11041                         if (src_entry->wired_count > 0) {
11042                                 vm_object_lock(object);
11043                                 result = vm_object_copy_slowly(
11044                                         object,
11045                                         offset,
11046                                         entry_size,
11047                                         THREAD_UNINT,
11048                                         &new_entry->object.vm_object);
11049
11050                                 new_entry->offset = 0;
11051                                 new_entry->needs_copy = FALSE;
11052                         } else {
11053                                 result = vm_object_copy_strategically(
11054                                         object,
11055                                         offset,
11056                                         entry_size,
11057                                         &new_entry->object.vm_object,
11058                                         &new_entry->offset,
11059                                         &new_entry_needs_copy);
11060
11061                                 new_entry->needs_copy = new_entry_needs_copy;
11062                         }
11063
11064                         /*
11065                          * Throw away the old object reference of the new entry.
11066                          */
11067                         vm_object_deallocate(object);
11068
11069                         if (result != KERN_SUCCESS &&
11070                             result != KERN_MEMORY_RESTART_COPY) {
11071                                 _vm_map_entry_dispose(map_header, new_entry);
11072                                 break;
11073                         }
11074
11075                         /*
11076                          * Verify that the map has not substantially
11077                          * changed while the copy was being made.
11078                          */
11079
11080                         vm_map_lock(map);
11081                         if (version.main_timestamp + 1 != map->timestamp) {
11082                                 /*
11083                                  * Simple version comparison failed.
11084                                  *
11085                                  * Retry the lookup and verify that the
11086                                  * same object/offset are still present.
11087                                  */
11088                                 vm_object_deallocate(new_entry->
11089                                                      object.vm_object);
11090                                 _vm_map_entry_dispose(map_header, new_entry);
11091                                 if (result == KERN_MEMORY_RESTART_COPY)
11092                                         result = KERN_SUCCESS;
11093                                 continue;
11094                         }
11095
11096                         if (result == KERN_MEMORY_RESTART_COPY) {
11097                                 vm_object_reference(object);
11098                                 goto RestartCopy;
11099                         }
11100                 }
11101
11102                 _vm_map_entry_link(map_header,
11103                                    map_header->links.prev, new_entry);
11104
11105                 *cur_protection &= src_entry->protection;
11106                 *max_protection &= src_entry->max_protection;
11107
11108                 map_address += tmp_size;
11109                 mapped_size += tmp_size;
11110                 src_start += tmp_size;
11111
11112         } /* end while */
11113
11114         vm_map_unlock(map);
11115         if (result != KERN_SUCCESS) {
11116                 /*
11117                  * Free all allocated elements.
11118                  */
11119                 for (src_entry = map_header->links.next;
11120                      src_entry != (struct vm_map_entry *)&map_header->links;
11121                      src_entry = new_entry) {
11122                         new_entry = src_entry->vme_next;
11123                         _vm_map_entry_unlink(map_header, src_entry);
11124                         vm_object_deallocate(src_entry->object.vm_object);
11125                         _vm_map_entry_dispose(map_header, src_entry);
11126                 }
11127         }
11128         return result;
11129 }
11130
11131 /*
11132  *      Routine:        vm_remap
11133  *
11134  *                      Map portion of a task's address space.
11135  *                      Mapped region must not overlap more than
11136  *                      one vm memory object. Protections and
11137  *                      inheritance attributes remain the same
11138  *                      as in the original task and are out parameters.
11139  *                      Source and Target task can be identical
11140  *                      Other attributes are identical as for vm_map()
11141  */
11142 kern_return_t
11143 vm_map_remap(
11144         vm_map_t                target_map,
11145         vm_map_address_t        *address,
11146         vm_map_size_t           size,
11147         vm_map_offset_t         mask,
11148         boolean_t               anywhere,
11149         vm_map_t                src_map,
11150         vm_map_offset_t         memory_address,
11151         boolean_t               copy,
11152         vm_prot_t               *cur_protection,
11153         vm_prot_t               *max_protection,
11154         vm_inherit_t            inheritance)
11155 {
11156         kern_return_t           result;
11157         vm_map_entry_t          entry;
11158         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11159         vm_map_entry_t          new_entry;
11160         struct vm_map_header    map_header;
11161
11162         if (target_map == VM_MAP_NULL)
11163                 return KERN_INVALID_ARGUMENT;
11164
11165         switch (inheritance) {
11166         case VM_INHERIT_NONE:
11167         case VM_INHERIT_COPY:
11168         case VM_INHERIT_SHARE:
11169                 if (size != 0 && src_map != VM_MAP_NULL)
11170                         break;
11171                 /*FALL THRU*/
11172         default:
11173                 return KERN_INVALID_ARGUMENT;
11174         }
11175
11176         size = vm_map_round_page(size);
11177
11178         result = vm_map_remap_extract(src_map, memory_address,
11179                                       size, copy, &map_header,
11180                                       cur_protection,
11181                                       max_protection,
11182                                       inheritance,
11183                                       target_map->hdr.
11184                                       entries_pageable);
11185
11186         if (result != KERN_SUCCESS) {
11187                 return result;
11188         }
11189
11190         /*
11191          * Allocate/check a range of free virtual address
11192          * space for the target
11193          */
11194         *address = vm_map_trunc_page(*address);
11195         vm_map_lock(target_map);
11196         result = vm_map_remap_range_allocate(target_map, address, size,
11197                                              mask, anywhere, &insp_entry);
11198
11199         for (entry = map_header.links.next;
11200              entry != (struct vm_map_entry *)&map_header.links;
11201              entry = new_entry) {
11202                 new_entry = entry->vme_next;
11203                 _vm_map_entry_unlink(&map_header, entry);
11204                 if (result == KERN_SUCCESS) {
11205                         entry->vme_start += *address;
11206                         entry->vme_end += *address;
11207                         vm_map_entry_link(target_map, insp_entry, entry);
11208                         insp_entry = entry;
11209                 } else {
11210                         if (!entry->is_sub_map) {
11211                                 vm_object_deallocate(entry->object.vm_object);
11212                         } else {
11213                                 vm_map_deallocate(entry->object.sub_map);
11214                         }
11215                         _vm_map_entry_dispose(&map_header, entry);
11216                 }
11217         }
11218
11219         if (result == KERN_SUCCESS) {
11220                 target_map->size += size;
11221                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11222         }
11223         vm_map_unlock(target_map);
11224
11225         if (result == KERN_SUCCESS && target_map->wiring_required)
11226                 result = vm_map_wire(target_map, *address,
11227                                      *address + size, *cur_protection, TRUE);
11228         return result;
11229 }
11230
11231 /*
11232  *      Routine:        vm_map_remap_range_allocate
11233  *
11234  *      Description:
11235  *              Allocate a range in the specified virtual address map.
11236  *              returns the address and the map entry just before the allocated
11237  *              range
11238  *
11239  *      Map must be locked.
11240  */
11241
11242 static kern_return_t
11243 vm_map_remap_range_allocate(
11244         vm_map_t                map,
11245         vm_map_address_t        *address,       /* IN/OUT */
11246         vm_map_size_t           size,
11247         vm_map_offset_t         mask,
11248         boolean_t               anywhere,
11249         vm_map_entry_t          *map_entry)     /* OUT */
11250 {
11251         register vm_map_entry_t entry;
11252         register vm_map_offset_t        start;
11253         register vm_map_offset_t        end;
11254
11255 StartAgain: ;
11256
11257         start = *address;
11258
11259         if (anywhere)
11260         {
11261                 /*
11262                  *      Calculate the first possible address.
11263                  */
11264
11265                 if (start < map->min_offset)
11266                         start = map->min_offset;
11267                 if (start > map->max_offset)
11268                         return(KERN_NO_SPACE);
11269
11270                 /*
11271                  *      Look for the first possible address;
11272                  *      if there's already something at this
11273                  *      address, we have to start after it.
11274                  */
11275
11276                 assert(first_free_is_valid(map));
11277                 if (start == map->min_offset) {
11278                         if ((entry = map->first_free) != vm_map_to_entry(map))
11279                                 start = entry->vme_end;
11280                 } else {
11281                         vm_map_entry_t  tmp_entry;
11282                         if (vm_map_lookup_entry(map, start, &tmp_entry))
11283                                 start = tmp_entry->vme_end;
11284                         entry = tmp_entry;
11285                 }
11286
11287                 /*
11288                  *      In any case, the "entry" always precedes
11289                  *      the proposed new region throughout the
11290                  *      loop:
11291                  */
11292
11293                 while (TRUE) {
11294                         register vm_map_entry_t next;
11295
11296                         /*
11297                          *      Find the end of the proposed new region.
11298                          *      Be sure we didn't go beyond the end, or
11299                          *      wrap around the address.
11300                          */
11301
11302                         end = ((start + mask) & ~mask);
11303                         if (end < start)
11304                                 return(KERN_NO_SPACE);
11305                         start = end;
11306                         end += size;
11307
11308                         if ((end > map->max_offset) || (end < start)) {
11309                                 if (map->wait_for_space) {
11310                                         if (size <= (map->max_offset -
11311                                                      map->min_offset)) {
11312                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11313                                                 vm_map_unlock(map);
11314                                                 thread_block(THREAD_CONTINUE_NULL);
11315                                                 vm_map_lock(map);
11316                                                 goto StartAgain;
11317                                         }
11318                                 }
11319
11320                                 return(KERN_NO_SPACE);
11321                         }
11322
11323                         /*
11324                          *      If there are no more entries, we must win.
11325                          */
11326
11327                         next = entry->vme_next;
11328                         if (next == vm_map_to_entry(map))
11329                                 break;
11330
11331                         /*
11332                          *      If there is another entry, it must be
11333                          *      after the end of the potential new region.
11334                          */
11335
11336                         if (next->vme_start >= end)
11337                                 break;
11338
11339                         /*
11340                          *      Didn't fit -- move to the next entry.
11341                          */
11342
11343                         entry = next;
11344                         start = entry->vme_end;
11345                 }
11346                 *address = start;
11347         } else {
11348                 vm_map_entry_t          temp_entry;
11349
11350                 /*
11351                  *      Verify that:
11352                  *              the address doesn't itself violate
11353                  *              the mask requirement.
11354                  */
11355
11356                 if ((start & mask) != 0)
11357                         return(KERN_NO_SPACE);
11358
11359
11360                 /*
11361                  *      ...     the address is within bounds
11362                  */
11363
11364                 end = start + size;
11365
11366                 if ((start < map->min_offset) ||
11367                     (end > map->max_offset) ||
11368                     (start >= end)) {
11369                         return(KERN_INVALID_ADDRESS);
11370                 }
11371
11372                 /*
11373                  *      ...     the starting address isn't allocated
11374                  */
11375
11376                 if (vm_map_lookup_entry(map, start, &temp_entry))
11377                         return(KERN_NO_SPACE);
11378
11379                 entry = temp_entry;
11380
11381                 /*
11382                  *      ...     the next region doesn't overlap the
11383                  *              end point.
11384                  */
11385
11386                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11387                     (entry->vme_next->vme_start < end))
11388                         return(KERN_NO_SPACE);
11389         }
11390         *map_entry = entry;
11391         return(KERN_SUCCESS);
11392 }
11393
11394 /*
11395  *      vm_map_switch:
11396  *
11397  *      Set the address map for the current thread to the specified map
11398  */
11399
11400 vm_map_t
11401 vm_map_switch(
11402         vm_map_t        map)
11403 {
11404         int             mycpu;
11405         thread_t        thread = current_thread();
11406         vm_map_t        oldmap = thread->map;
11407
11408         mp_disable_preemption();
11409         mycpu = cpu_number();
11410
11411         /*
11412          *      Deactivate the current map and activate the requested map
11413          */
11414         PMAP_SWITCH_USER(thread, map, mycpu);
11415
11416         mp_enable_preemption();
11417         return(oldmap);
11418 }
11419
11420
11421 /*
11422  *      Routine:        vm_map_write_user
11423  *
11424  *      Description:
11425  *              Copy out data from a kernel space into space in the
11426  *              destination map. The space must already exist in the
11427  *              destination map.
11428  *              NOTE:  This routine should only be called by threads
11429  *              which can block on a page fault. i.e. kernel mode user
11430  *              threads.
11431  *
11432  */
11433 kern_return_t
11434 vm_map_write_user(
11435         vm_map_t                map,
11436         void                    *src_p,
11437         vm_map_address_t        dst_addr,
11438         vm_size_t               size)
11439 {
11440         kern_return_t   kr = KERN_SUCCESS;
11441
11442         if(current_map() == map) {
11443                 if (copyout(src_p, dst_addr, size)) {
11444                         kr = KERN_INVALID_ADDRESS;
11445                 }
11446         } else {
11447                 vm_map_t        oldmap;
11448
11449                 /* take on the identity of the target map while doing */
11450                 /* the transfer */
11451
11452                 vm_map_reference(map);
11453                 oldmap = vm_map_switch(map);
11454                 if (copyout(src_p, dst_addr, size)) {
11455                         kr = KERN_INVALID_ADDRESS;
11456                 }
11457                 vm_map_switch(oldmap);
11458                 vm_map_deallocate(map);
11459         }
11460         return kr;
11461 }
11462
11463 /*
11464  *      Routine:        vm_map_read_user
11465  *
11466  *      Description:
11467  *              Copy in data from a user space source map into the
11468  *              kernel map. The space must already exist in the
11469  *              kernel map.
11470  *              NOTE:  This routine should only be called by threads
11471  *              which can block on a page fault. i.e. kernel mode user
11472  *              threads.
11473  *
11474  */
11475 kern_return_t
11476 vm_map_read_user(
11477         vm_map_t                map,
11478         vm_map_address_t        src_addr,
11479         void                    *dst_p,
11480         vm_size_t               size)
11481 {
11482         kern_return_t   kr = KERN_SUCCESS;
11483
11484         if(current_map() == map) {
11485                 if (copyin(src_addr, dst_p, size)) {
11486                         kr = KERN_INVALID_ADDRESS;
11487                 }
11488         } else {
11489                 vm_map_t        oldmap;
11490
11491                 /* take on the identity of the target map while doing */
11492                 /* the transfer */
11493
11494                 vm_map_reference(map);
11495                 oldmap = vm_map_switch(map);
11496                 if (copyin(src_addr, dst_p, size)) {
11497                         kr = KERN_INVALID_ADDRESS;
11498                 }
11499                 vm_map_switch(oldmap);
11500                 vm_map_deallocate(map);
11501         }
11502         return kr;
11503 }
11504
11505
11506 /*
11507  *      vm_map_check_protection:
11508  *
11509  *      Assert that the target map allows the specified
11510  *      privilege on the entire address region given.
11511  *      The entire region must be allocated.
11512  */
11513 boolean_t
11514 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11515                         vm_map_offset_t end, vm_prot_t protection)
11516 {
11517         vm_map_entry_t entry;
11518         vm_map_entry_t tmp_entry;
11519
11520         vm_map_lock(map);
11521
11522         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11523         {
11524                 vm_map_unlock(map);
11525                 return (FALSE);
11526         }
11527
11528         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11529                 vm_map_unlock(map);
11530                 return(FALSE);
11531         }
11532
11533         entry = tmp_entry;
11534
11535         while (start < end) {
11536                 if (entry == vm_map_to_entry(map)) {
11537                         vm_map_unlock(map);
11538                         return(FALSE);
11539                 }
11540
11541                 /*
11542                  *      No holes allowed!
11543                  */
11544
11545                 if (start < entry->vme_start) {
11546                         vm_map_unlock(map);
11547                         return(FALSE);
11548                 }
11549
11550                 /*
11551                  * Check protection associated with entry.
11552                  */
11553
11554                 if ((entry->protection & protection) != protection) {
11555                         vm_map_unlock(map);
11556                         return(FALSE);
11557                 }
11558
11559                 /* go to next entry */
11560
11561                 start = entry->vme_end;
11562                 entry = entry->vme_next;
11563         }
11564         vm_map_unlock(map);
11565         return(TRUE);
11566 }
11567
11568 kern_return_t
11569 vm_map_purgable_control(
11570         vm_map_t                map,
11571         vm_map_offset_t         address,
11572         vm_purgable_t           control,
11573         int                     *state)
11574 {
11575         vm_map_entry_t          entry;
11576         vm_object_t             object;
11577         kern_return_t           kr;
11578
11579         /*
11580          * Vet all the input parameters and current type and state of the
11581          * underlaying object.  Return with an error if anything is amiss.
11582          */
11583         if (map == VM_MAP_NULL)
11584                 return(KERN_INVALID_ARGUMENT);
11585
11586         if (control != VM_PURGABLE_SET_STATE &&
11587             control != VM_PURGABLE_GET_STATE &&
11588             control != VM_PURGABLE_PURGE_ALL)
11589                 return(KERN_INVALID_ARGUMENT);
11590
11591         if (control == VM_PURGABLE_PURGE_ALL) {
11592                 vm_purgeable_object_purge_all();
11593                 return KERN_SUCCESS;
11594         }
11595
11596         if (control == VM_PURGABLE_SET_STATE &&
11597             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11598              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11599                 return(KERN_INVALID_ARGUMENT);
11600
11601         vm_map_lock_read(map);
11602
11603         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11604
11605                 /*
11606                  * Must pass a valid non-submap address.
11607                  */
11608                 vm_map_unlock_read(map);
11609                 return(KERN_INVALID_ADDRESS);
11610         }
11611
11612         if ((entry->protection & VM_PROT_WRITE) == 0) {
11613                 /*
11614                  * Can't apply purgable controls to something you can't write.
11615                  */
11616                 vm_map_unlock_read(map);
11617                 return(KERN_PROTECTION_FAILURE);
11618         }
11619
11620         object = entry->object.vm_object;
11621         if (object == VM_OBJECT_NULL) {
11622                 /*
11623                  * Object must already be present or it can't be purgable.
11624                  */
11625                 vm_map_unlock_read(map);
11626                 return KERN_INVALID_ARGUMENT;
11627         }
11628
11629         vm_object_lock(object);
11630
11631         if (entry->offset != 0 ||
11632             entry->vme_end - entry->vme_start != object->size) {
11633                 /*
11634                  * Can only apply purgable controls to the whole (existing)
11635                  * object at once.
11636                  */
11637                 vm_map_unlock_read(map);
11638                 vm_object_unlock(object);
11639                 return KERN_INVALID_ARGUMENT;
11640         }
11641
11642         vm_map_unlock_read(map);
11643
11644         kr = vm_object_purgable_control(object, control, state);
11645
11646         vm_object_unlock(object);
11647
11648         return kr;
11649 }
11650
11651 kern_return_t
11652 vm_map_page_query_internal(
11653         vm_map_t        target_map,
11654         vm_map_offset_t offset,
11655         int             *disposition,
11656         int             *ref_count)
11657 {
11658         kern_return_t                   kr;
11659         vm_page_info_basic_data_t       info;
11660         mach_msg_type_number_t          count;
11661
11662         count = VM_PAGE_INFO_BASIC_COUNT;
11663         kr = vm_map_page_info(target_map,
11664                               offset,
11665                               VM_PAGE_INFO_BASIC,
11666                               (vm_page_info_t) &info,
11667                               &count);
11668         if (kr == KERN_SUCCESS) {
11669                 *disposition = info.disposition;
11670                 *ref_count = info.ref_count;
11671         } else {
11672                 *disposition = 0;
11673                 *ref_count = 0;
11674         }
11675
11676         return kr;
11677 }
11678
11679 kern_return_t
11680 vm_map_page_info(
11681         vm_map_t                map,
11682         vm_map_offset_t         offset,
11683         vm_page_info_flavor_t   flavor,
11684         vm_page_info_t          info,
11685         mach_msg_type_number_t  *count)
11686 {
11687         vm_map_entry_t          map_entry;
11688         vm_object_t             object;
11689         vm_page_t               m;
11690         kern_return_t           kr;
11691         kern_return_t           retval = KERN_SUCCESS;
11692         boolean_t               top_object;
11693         int                     disposition;
11694         int                     ref_count;
11695         vm_object_id_t          object_id;
11696         vm_page_info_basic_t    basic_info;
11697         int                     depth;
11698
11699         switch (flavor) {
11700         case VM_PAGE_INFO_BASIC:
11701                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
11702                         return KERN_INVALID_ARGUMENT;
11703                 }
11704                 break;
11705         default:
11706                 return KERN_INVALID_ARGUMENT;
11707         }
11708
11709         disposition = 0;
11710         ref_count = 0;
11711         object_id = 0;
11712         top_object = TRUE;
11713         depth = 0;
11714
11715         retval = KERN_SUCCESS;
11716         offset = vm_map_trunc_page(offset);
11717
11718         vm_map_lock_read(map);
11719
11720         /*
11721          * First, find the map entry covering "offset", going down
11722          * submaps if necessary.
11723          */
11724         for (;;) {
11725                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
11726                         vm_map_unlock_read(map);
11727                         return KERN_INVALID_ADDRESS;
11728                 }
11729                 /* compute offset from this map entry's start */
11730                 offset -= map_entry->vme_start;
11731                 /* compute offset into this map entry's object (or submap) */
11732                 offset += map_entry->offset;
11733
11734                 if (map_entry->is_sub_map) {
11735                         vm_map_t sub_map;
11736
11737                         sub_map = map_entry->object.sub_map;
11738                         vm_map_lock_read(sub_map);
11739                         vm_map_unlock_read(map);
11740
11741                         map = sub_map;
11742
11743                         ref_count = MAX(ref_count, map->ref_count);
11744                         continue;
11745                 }
11746                 break;
11747         }
11748
11749         object = map_entry->object.vm_object;
11750         if (object == VM_OBJECT_NULL) {
11751                 /* no object -> no page */
11752                 vm_map_unlock_read(map);
11753                 goto done;
11754         }
11755
11756         vm_object_lock(object);
11757         vm_map_unlock_read(map);
11758
11759         /*
11760          * Go down the VM object shadow chain until we find the page
11761          * we're looking for.
11762          */
11763         for (;;) {
11764                 ref_count = MAX(ref_count, object->ref_count);
11765
11766                 m = vm_page_lookup(object, offset);
11767
11768                 if (m != VM_PAGE_NULL) {
11769                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
11770                         break;
11771                 } else {
11772 #if MACH_PAGEMAP
11773                         if (object->existence_map) {
11774                                 if (vm_external_state_get(object->existence_map,
11775                                                           offset) ==
11776                                     VM_EXTERNAL_STATE_EXISTS) {
11777                                         /*
11778                                          * this page has been paged out
11779                                          */
11780                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11781                                         break;
11782                                 }
11783                         } else
11784 #endif
11785                         {
11786                                 if (object->internal &&
11787                                     object->alive &&
11788                                     !object->terminating &&
11789                                     object->pager_ready) {
11790
11791                                         memory_object_t pager;
11792
11793                                         vm_object_paging_begin(object);
11794                                         pager = object->pager;
11795                                         vm_object_unlock(object);
11796
11797                                         /*
11798                                          * Ask the default pager if
11799                                          * it has this page.
11800                                          */
11801                                         kr = memory_object_data_request(
11802                                                 pager,
11803                                                 offset + object->paging_offset,
11804                                                 0, /* just poke the pager */
11805                                                 VM_PROT_READ,
11806                                                 NULL);
11807
11808                                         vm_object_lock(object);
11809                                         vm_object_paging_end(object);
11810
11811                                         if (kr == KERN_SUCCESS) {
11812                                                 /* the default pager has it */
11813                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11814                                                 break;
11815                                         }
11816                                 }
11817                         }
11818
11819                         if (object->shadow != VM_OBJECT_NULL) {
11820                                 vm_object_t shadow;
11821
11822                                 offset += object->shadow_offset;
11823                                 shadow = object->shadow;
11824
11825                                 vm_object_lock(shadow);
11826                                 vm_object_unlock(object);
11827
11828                                 object = shadow;
11829                                 top_object = FALSE;
11830                                 depth++;
11831                         } else {
11832 //                              if (!object->internal)
11833 //                                      break;
11834 //                              retval = KERN_FAILURE;
11835 //                              goto done_with_object;
11836                                 break;
11837                         }
11838                 }
11839         }
11840         /* The ref_count is not strictly accurate, it measures the number   */
11841         /* of entities holding a ref on the object, they may not be mapping */
11842         /* the object or may not be mapping the section holding the         */
11843         /* target page but its still a ball park number and though an over- */
11844         /* count, it picks up the copy-on-write cases                       */
11845
11846         /* We could also get a picture of page sharing from pmap_attributes */
11847         /* but this would under count as only faulted-in mappings would     */
11848         /* show up.                                                         */
11849
11850         if (top_object == TRUE && object->shadow)
11851                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
11852
11853         if (! object->internal)
11854                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
11855
11856         if (m == VM_PAGE_NULL)
11857                 goto done_with_object;
11858
11859         if (m->fictitious) {
11860                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
11861                 goto done_with_object;
11862         }
11863         if (m->dirty || pmap_is_modified(m->phys_page))
11864                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
11865
11866         if (m->reference || pmap_is_referenced(m->phys_page))
11867                 disposition |= VM_PAGE_QUERY_PAGE_REF;
11868
11869         if (m->speculative)
11870                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
11871
11872         if (m->cs_validated)
11873                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
11874         if (m->cs_tainted)
11875                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
11876
11877 done_with_object:
11878         vm_object_unlock(object);
11879 done:
11880
11881         switch (flavor) {
11882         case VM_PAGE_INFO_BASIC:
11883                 basic_info = (vm_page_info_basic_t) info;
11884                 basic_info->disposition = disposition;
11885                 basic_info->ref_count = ref_count;
11886                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
11887                 basic_info->offset = (memory_object_offset_t) offset;
11888                 basic_info->depth = depth;
11889                 break;
11890         }
11891
11892         return retval;
11893 }
11894
11895 /*
11896  *      vm_map_msync
11897  *
11898  *      Synchronises the memory range specified with its backing store
11899  *      image by either flushing or cleaning the contents to the appropriate
11900  *      memory manager engaging in a memory object synchronize dialog with
11901  *      the manager.  The client doesn't return until the manager issues
11902  *      m_o_s_completed message.  MIG Magically converts user task parameter
11903  *      to the task's address map.
11904  *
11905  *      interpretation of sync_flags
11906  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
11907  *                                pages to manager.
11908  *
11909  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11910  *                              - discard pages, write dirty or precious
11911  *                                pages back to memory manager.
11912  *
11913  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11914  *                              - write dirty or precious pages back to
11915  *                                the memory manager.
11916  *
11917  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
11918  *                                is a hole in the region, and we would
11919  *                                have returned KERN_SUCCESS, return
11920  *                                KERN_INVALID_ADDRESS instead.
11921  *
11922  *      NOTE
11923  *      The memory object attributes have not yet been implemented, this
11924  *      function will have to deal with the invalidate attribute
11925  *
11926  *      RETURNS
11927  *      KERN_INVALID_TASK               Bad task parameter
11928  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
11929  *      KERN_SUCCESS                    The usual.
11930  *      KERN_INVALID_ADDRESS            There was a hole in the region.
11931  */
11932
11933 kern_return_t
11934 vm_map_msync(
11935         vm_map_t                map,
11936         vm_map_address_t        address,
11937         vm_map_size_t           size,
11938         vm_sync_t               sync_flags)
11939 {
11940         msync_req_t             msr;
11941         msync_req_t             new_msr;
11942         queue_chain_t           req_q;  /* queue of requests for this msync */
11943         vm_map_entry_t          entry;
11944         vm_map_size_t           amount_left;
11945         vm_object_offset_t      offset;
11946         boolean_t               do_sync_req;
11947         boolean_t               had_hole = FALSE;
11948         memory_object_t         pager;
11949
11950         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
11951             (sync_flags & VM_SYNC_SYNCHRONOUS))
11952                 return(KERN_INVALID_ARGUMENT);
11953
11954         /*
11955          * align address and size on page boundaries
11956          */
11957         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
11958         address = vm_map_trunc_page(address);
11959
11960         if (map == VM_MAP_NULL)
11961                 return(KERN_INVALID_TASK);
11962
11963         if (size == 0)
11964                 return(KERN_SUCCESS);
11965
11966         queue_init(&req_q);
11967         amount_left = size;
11968
11969         while (amount_left > 0) {
11970                 vm_object_size_t        flush_size;
11971                 vm_object_t             object;
11972
11973                 vm_map_lock(map);
11974                 if (!vm_map_lookup_entry(map,
11975                                          vm_map_trunc_page(address), &entry)) {
11976
11977                         vm_map_size_t   skip;
11978
11979                         /*
11980                          * hole in the address map.
11981                          */
11982                         had_hole = TRUE;
11983
11984                         /*
11985                          * Check for empty map.
11986                          */
11987                         if (entry == vm_map_to_entry(map) &&
11988                             entry->vme_next == entry) {
11989                                 vm_map_unlock(map);
11990                                 break;
11991                         }
11992                         /*
11993                          * Check that we don't wrap and that
11994                          * we have at least one real map entry.
11995                          */
11996                         if ((map->hdr.nentries == 0) ||
11997                             (entry->vme_next->vme_start < address)) {
11998                                 vm_map_unlock(map);
11999                                 break;
12000                         }
12001                         /*
12002                          * Move up to the next entry if needed
12003                          */
12004                         skip = (entry->vme_next->vme_start - address);
12005                         if (skip >= amount_left)
12006                                 amount_left = 0;
12007                         else
12008                                 amount_left -= skip;
12009                         address = entry->vme_next->vme_start;
12010                         vm_map_unlock(map);
12011                         continue;
12012                 }
12013
12014                 offset = address - entry->vme_start;
12015
12016                 /*
12017                  * do we have more to flush than is contained in this
12018                  * entry ?
12019                  */
12020                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12021                         flush_size = entry->vme_end -
12022                                 (entry->vme_start + offset);
12023                 } else {
12024                         flush_size = amount_left;
12025                 }
12026                 amount_left -= flush_size;
12027                 address += flush_size;
12028
12029                 if (entry->is_sub_map == TRUE) {
12030                         vm_map_t        local_map;
12031                         vm_map_offset_t local_offset;
12032
12033                         local_map = entry->object.sub_map;
12034                         local_offset = entry->offset;
12035                         vm_map_unlock(map);
12036                         if (vm_map_msync(
12037                                     local_map,
12038                                     local_offset,
12039                                     flush_size,
12040                                     sync_flags) == KERN_INVALID_ADDRESS) {
12041                                 had_hole = TRUE;
12042                         }
12043                         continue;
12044                 }
12045                 object = entry->object.vm_object;
12046
12047                 /*
12048                  * We can't sync this object if the object has not been
12049                  * created yet
12050                  */
12051                 if (object == VM_OBJECT_NULL) {
12052                         vm_map_unlock(map);
12053                         continue;
12054                 }
12055                 offset += entry->offset;
12056
12057                 vm_object_lock(object);
12058
12059                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12060                         int kill_pages = 0;
12061                         boolean_t reusable_pages = FALSE;
12062
12063                         if (sync_flags & VM_SYNC_KILLPAGES) {
12064                                 if (object->ref_count == 1 && !object->shadow)
12065                                         kill_pages = 1;
12066                                 else
12067                                         kill_pages = -1;
12068                         }
12069                         if (kill_pages != -1)
12070                                 vm_object_deactivate_pages(object, offset,
12071                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12072                         vm_object_unlock(object);
12073                         vm_map_unlock(map);
12074                         continue;
12075                 }
12076                 /*
12077                  * We can't sync this object if there isn't a pager.
12078                  * Don't bother to sync internal objects, since there can't
12079                  * be any "permanent" storage for these objects anyway.
12080                  */
12081                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12082                     (object->internal) || (object->private)) {
12083                         vm_object_unlock(object);
12084                         vm_map_unlock(map);
12085                         continue;
12086                 }
12087                 /*
12088                  * keep reference on the object until syncing is done
12089                  */
12090                 vm_object_reference_locked(object);
12091                 vm_object_unlock(object);
12092
12093                 vm_map_unlock(map);
12094
12095                 do_sync_req = vm_object_sync(object,
12096                                              offset,
12097                                              flush_size,
12098                                              sync_flags & VM_SYNC_INVALIDATE,
12099                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12100                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12101                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12102                 /*
12103                  * only send a m_o_s if we returned pages or if the entry
12104                  * is writable (ie dirty pages may have already been sent back)
12105                  */
12106                 if (!do_sync_req) {
12107                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12108                                 /*
12109                                  * clear out the clustering and read-ahead hints
12110                                  */
12111                                 vm_object_lock(object);
12112
12113                                 object->pages_created = 0;
12114                                 object->pages_used = 0;
12115                                 object->sequential = 0;
12116                                 object->last_alloc = 0;
12117
12118                                 vm_object_unlock(object);
12119                         }
12120                         vm_object_deallocate(object);
12121                         continue;
12122                 }
12123                 msync_req_alloc(new_msr);
12124
12125                 vm_object_lock(object);
12126                 offset += object->paging_offset;
12127
12128                 new_msr->offset = offset;
12129                 new_msr->length = flush_size;
12130                 new_msr->object = object;
12131                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12132         re_iterate:
12133
12134                 /*
12135                  * We can't sync this object if there isn't a pager.  The
12136                  * pager can disappear anytime we're not holding the object
12137                  * lock.  So this has to be checked anytime we goto re_iterate.
12138                  */
12139
12140                 pager = object->pager;
12141
12142                 if (pager == MEMORY_OBJECT_NULL) {
12143                         vm_object_unlock(object);
12144                         vm_object_deallocate(object);
12145                         continue;
12146                 }
12147
12148                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12149                         /*
12150                          * need to check for overlapping entry, if found, wait
12151                          * on overlapping msr to be done, then reiterate
12152                          */
12153                         msr_lock(msr);
12154                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12155                             ((offset >= msr->offset &&
12156                               offset < (msr->offset + msr->length)) ||
12157                              (msr->offset >= offset &&
12158                               msr->offset < (offset + flush_size))))
12159                         {
12160                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12161                                 msr_unlock(msr);
12162                                 vm_object_unlock(object);
12163                                 thread_block(THREAD_CONTINUE_NULL);
12164                                 vm_object_lock(object);
12165                                 goto re_iterate;
12166                         }
12167                         msr_unlock(msr);
12168                 }/* queue_iterate */
12169
12170                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12171
12172                 vm_object_paging_begin(object);
12173                 vm_object_unlock(object);
12174
12175                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12176
12177                 (void) memory_object_synchronize(
12178                         pager,
12179                         offset,
12180                         flush_size,
12181                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12182
12183                 vm_object_lock(object);
12184                 vm_object_paging_end(object);
12185                 vm_object_unlock(object);
12186         }/* while */
12187
12188         /*
12189          * wait for memory_object_sychronize_completed messages from pager(s)
12190          */
12191
12192         while (!queue_empty(&req_q)) {
12193                 msr = (msync_req_t)queue_first(&req_q);
12194                 msr_lock(msr);
12195                 while(msr->flag != VM_MSYNC_DONE) {
12196                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12197                         msr_unlock(msr);
12198                         thread_block(THREAD_CONTINUE_NULL);
12199                         msr_lock(msr);
12200                 }/* while */
12201                 queue_remove(&req_q, msr, msync_req_t, req_q);
12202                 msr_unlock(msr);
12203                 vm_object_deallocate(msr->object);
12204                 msync_req_free(msr);
12205         }/* queue_iterate */
12206
12207         /* for proper msync() behaviour */
12208         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12209                 return(KERN_INVALID_ADDRESS);
12210
12211         return(KERN_SUCCESS);
12212 }/* vm_msync */
12213
12214 /*
12215  *      Routine:        convert_port_entry_to_map
12216  *      Purpose:
12217  *              Convert from a port specifying an entry or a task
12218  *              to a map. Doesn't consume the port ref; produces a map ref,
12219  *              which may be null.  Unlike convert_port_to_map, the
12220  *              port may be task or a named entry backed.
12221  *      Conditions:
12222  *              Nothing locked.
12223  */
12224
12225
12226 vm_map_t
12227 convert_port_entry_to_map(
12228         ipc_port_t      port)
12229 {
12230         vm_map_t map;
12231         vm_named_entry_t        named_entry;
12232         uint32_t        try_failed_count = 0;
12233
12234         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12235                 while(TRUE) {
12236                         ip_lock(port);
12237                         if(ip_active(port) && (ip_kotype(port)
12238                                                == IKOT_NAMED_ENTRY)) {
12239                                 named_entry =
12240                                         (vm_named_entry_t)port->ip_kobject;
12241                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12242                                         ip_unlock(port);
12243
12244                                         try_failed_count++;
12245                                         mutex_pause(try_failed_count);
12246                                         continue;
12247                                 }
12248                                 named_entry->ref_count++;
12249                                 lck_mtx_unlock(&(named_entry)->Lock);
12250                                 ip_unlock(port);
12251                                 if ((named_entry->is_sub_map) &&
12252                                     (named_entry->protection
12253                                      & VM_PROT_WRITE)) {
12254                                         map = named_entry->backing.map;
12255                                 } else {
12256                                         mach_destroy_memory_entry(port);
12257                                         return VM_MAP_NULL;
12258                                 }
12259                                 vm_map_reference_swap(map);
12260                                 mach_destroy_memory_entry(port);
12261                                 break;
12262                         }
12263                         else
12264                                 return VM_MAP_NULL;
12265                 }
12266         }
12267         else
12268                 map = convert_port_to_map(port);
12269
12270         return map;
12271 }
12272
12273 /*
12274  *      Routine:        convert_port_entry_to_object
12275  *      Purpose:
12276  *              Convert from a port specifying a named entry to an
12277  *              object. Doesn't consume the port ref; produces a map ref,
12278  *              which may be null.
12279  *      Conditions:
12280  *              Nothing locked.
12281  */
12282
12283
12284 vm_object_t
12285 convert_port_entry_to_object(
12286         ipc_port_t      port)
12287 {
12288         vm_object_t object;
12289         vm_named_entry_t        named_entry;
12290         uint32_t        try_failed_count = 0;
12291
12292         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12293                 while(TRUE) {
12294                         ip_lock(port);
12295                         if(ip_active(port) && (ip_kotype(port)
12296                                                == IKOT_NAMED_ENTRY)) {
12297                                 named_entry =
12298                                         (vm_named_entry_t)port->ip_kobject;
12299                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12300                                         ip_unlock(port);
12301
12302                                         try_failed_count++;
12303                                         mutex_pause(try_failed_count);
12304                                         continue;
12305                                 }
12306                                 named_entry->ref_count++;
12307                                 lck_mtx_unlock(&(named_entry)->Lock);
12308                                 ip_unlock(port);
12309                                 if ((!named_entry->is_sub_map) &&
12310                                     (!named_entry->is_pager) &&
12311                                     (named_entry->protection
12312                                      & VM_PROT_WRITE)) {
12313                                         object = named_entry->backing.object;
12314                                 } else {
12315                                         mach_destroy_memory_entry(port);
12316                                         return (vm_object_t)NULL;
12317                                 }
12318                                 vm_object_reference(named_entry->backing.object);
12319                                 mach_destroy_memory_entry(port);
12320                                 break;
12321                         }
12322                         else
12323                                 return (vm_object_t)NULL;
12324                 }
12325         } else {
12326                 return (vm_object_t)NULL;
12327         }
12328
12329         return object;
12330 }
12331
12332 /*
12333  * Export routines to other components for the things we access locally through
12334  * macros.
12335  */
12336 #undef current_map
12337 vm_map_t
12338 current_map(void)
12339 {
12340         return (current_map_fast());
12341 }
12342
12343 /*
12344  *      vm_map_reference:
12345  *
12346  *      Most code internal to the osfmk will go through a
12347  *      macro defining this.  This is always here for the
12348  *      use of other kernel components.
12349  */
12350 #undef vm_map_reference
12351 void
12352 vm_map_reference(
12353         register vm_map_t       map)
12354 {
12355         if (map == VM_MAP_NULL)
12356                 return;
12357
12358         lck_mtx_lock(&map->s_lock);
12359 #if     TASK_SWAPPER
12360         assert(map->res_count > 0);
12361         assert(map->ref_count >= map->res_count);
12362         map->res_count++;
12363 #endif
12364         map->ref_count++;
12365         lck_mtx_unlock(&map->s_lock);
12366 }
12367
12368 /*
12369  *      vm_map_deallocate:
12370  *
12371  *      Removes a reference from the specified map,
12372  *      destroying it if no references remain.
12373  *      The map should not be locked.
12374  */
12375 void
12376 vm_map_deallocate(
12377         register vm_map_t       map)
12378 {
12379         unsigned int            ref;
12380
12381         if (map == VM_MAP_NULL)
12382                 return;
12383
12384         lck_mtx_lock(&map->s_lock);
12385         ref = --map->ref_count;
12386         if (ref > 0) {
12387                 vm_map_res_deallocate(map);
12388                 lck_mtx_unlock(&map->s_lock);
12389                 return;
12390         }
12391         assert(map->ref_count == 0);
12392         lck_mtx_unlock(&map->s_lock);
12393
12394 #if     TASK_SWAPPER
12395         /*
12396          * The map residence count isn't decremented here because
12397          * the vm_map_delete below will traverse the entire map,
12398          * deleting entries, and the residence counts on objects
12399          * and sharing maps will go away then.
12400          */
12401 #endif
12402
12403         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12404 }
12405
12406
12407 void
12408 vm_map_disable_NX(vm_map_t map)
12409 {
12410         if (map == NULL)
12411                 return;
12412         if (map->pmap == NULL)
12413                 return;
12414
12415         pmap_disable_NX(map->pmap);
12416 }
12417
12418 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12419  * more descriptive.
12420  */
12421 void
12422 vm_map_set_32bit(vm_map_t map)
12423 {
12424         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12425 }
12426
12427
12428 void
12429 vm_map_set_64bit(vm_map_t map)
12430 {
12431         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12432 }
12433
12434 vm_map_offset_t
12435 vm_compute_max_offset(unsigned is64)
12436 {
12437         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12438 }
12439
12440 boolean_t
12441 vm_map_is_64bit(
12442                 vm_map_t map)
12443 {
12444         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12445 }
12446
12447 boolean_t
12448 vm_map_has_4GB_pagezero(
12449                 vm_map_t map)
12450 {
12451         /*
12452          * XXX FBDP
12453          * We should lock the VM map (for read) here but we can get away
12454          * with it for now because there can't really be any race condition:
12455          * the VM map's min_offset is changed only when the VM map is created
12456          * and when the zero page is established (when the binary gets loaded),
12457          * and this routine gets called only when the task terminates and the
12458          * VM map is being torn down, and when a new map is created via
12459          * load_machfile()/execve().
12460          */
12461         return (map->min_offset >= 0x100000000ULL);
12462 }
12463
12464 void
12465 vm_map_set_4GB_pagezero(vm_map_t map)
12466 {
12467 #ifdef __i386__
12468         pmap_set_4GB_pagezero(map->pmap);
12469 #else
12470 #pragma unused(map)
12471 #endif
12472
12473 }
12474
12475 void
12476 vm_map_clear_4GB_pagezero(vm_map_t map)
12477 {
12478 #ifdef __i386__
12479         pmap_clear_4GB_pagezero(map->pmap);
12480 #else
12481 #pragma unused(map)
12482 #endif
12483 }
12484
12485 /*
12486  * Raise a VM map's minimum offset.
12487  * To strictly enforce "page zero" reservation.
12488  */
12489 kern_return_t
12490 vm_map_raise_min_offset(
12491         vm_map_t        map,
12492         vm_map_offset_t new_min_offset)
12493 {
12494         vm_map_entry_t  first_entry;
12495
12496         new_min_offset = vm_map_round_page(new_min_offset);
12497
12498         vm_map_lock(map);
12499
12500         if (new_min_offset < map->min_offset) {
12501                 /*
12502                  * Can't move min_offset backwards, as that would expose
12503                  * a part of the address space that was previously, and for
12504                  * possibly good reasons, inaccessible.
12505                  */
12506                 vm_map_unlock(map);
12507                 return KERN_INVALID_ADDRESS;
12508         }
12509
12510         first_entry = vm_map_first_entry(map);
12511         if (first_entry != vm_map_to_entry(map) &&
12512             first_entry->vme_start < new_min_offset) {
12513                 /*
12514                  * Some memory was already allocated below the new
12515                  * minimun offset.  It's too late to change it now...
12516                  */
12517                 vm_map_unlock(map);
12518                 return KERN_NO_SPACE;
12519         }
12520
12521         map->min_offset = new_min_offset;
12522
12523         vm_map_unlock(map);
12524
12525         return KERN_SUCCESS;
12526 }
12527
12528 /*
12529  * Set the limit on the maximum amount of user wired memory allowed for this map.
12530  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12531  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12532  * don't have to reach over to the BSD data structures.
12533  */
12534
12535 void
12536 vm_map_set_user_wire_limit(vm_map_t     map,
12537                            vm_size_t    limit)
12538 {
12539         map->user_wire_limit = limit;
12540 }
12541
12542
12543 void vm_map_switch_protect(vm_map_t     map,
12544                            boolean_t    val)
12545 {
12546         vm_map_lock(map);
12547         map->switch_protect=val;
12548         vm_map_unlock(map);
12549 }