osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <machine/db_machdep.h>
  98 #include <kern/xpr.h>
  99
 100 #include <mach/vm_map_server.h>
 101 #include <mach/mach_host_server.h>
 102 #include <vm/vm_protos.h>
 103 #include <vm/vm_purgeable_internal.h>
 104
 105 #ifdef ppc
 106 #include <ppc/mappings.h>
 107 #endif /* ppc */
 108
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_shared_region.h>
 111
 112 /* Internal prototypes
 113  */
 114
 115 static void vm_map_simplify_range(
 116         vm_map_t        map,
 117         vm_map_offset_t start,
 118         vm_map_offset_t end);   /* forward */
 119
 120 static boolean_t        vm_map_range_check(
 121         vm_map_t        map,
 122         vm_map_offset_t start,
 123         vm_map_offset_t end,
 124         vm_map_entry_t  *entry);
 125
 126 static vm_map_entry_t   _vm_map_entry_create(
 127         struct vm_map_header    *map_header);
 128
 129 static void             _vm_map_entry_dispose(
 130         struct vm_map_header    *map_header,
 131         vm_map_entry_t          entry);
 132
 133 static void             vm_map_pmap_enter(
 134         vm_map_t                map,
 135         vm_map_offset_t         addr,
 136         vm_map_offset_t         end_addr,
 137         vm_object_t             object,
 138         vm_object_offset_t      offset,
 139         vm_prot_t               protection);
 140
 141 static void             _vm_map_clip_end(
 142         struct vm_map_header    *map_header,
 143         vm_map_entry_t          entry,
 144         vm_map_offset_t         end);
 145
 146 static void             _vm_map_clip_start(
 147         struct vm_map_header    *map_header,
 148         vm_map_entry_t          entry,
 149         vm_map_offset_t         start);
 150
 151 static void             vm_map_entry_delete(
 152         vm_map_t        map,
 153         vm_map_entry_t  entry);
 154
 155 static kern_return_t    vm_map_delete(
 156         vm_map_t        map,
 157         vm_map_offset_t start,
 158         vm_map_offset_t end,
 159         int             flags,
 160         vm_map_t        zap_map);
 161
 162 static kern_return_t    vm_map_copy_overwrite_unaligned(
 163         vm_map_t        dst_map,
 164         vm_map_entry_t  entry,
 165         vm_map_copy_t   copy,
 166         vm_map_address_t start);
 167
 168 static kern_return_t    vm_map_copy_overwrite_aligned(
 169         vm_map_t        dst_map,
 170         vm_map_entry_t  tmp_entry,
 171         vm_map_copy_t   copy,
 172         vm_map_offset_t start,
 173         pmap_t          pmap);
 174
 175 static kern_return_t    vm_map_copyin_kernel_buffer(
 176         vm_map_t        src_map,
 177         vm_map_address_t src_addr,
 178         vm_map_size_t   len,
 179         boolean_t       src_destroy,
 180         vm_map_copy_t   *copy_result);  /* OUT */
 181
 182 static kern_return_t    vm_map_copyout_kernel_buffer(
 183         vm_map_t        map,
 184         vm_map_address_t *addr, /* IN/OUT */
 185         vm_map_copy_t   copy,
 186         boolean_t       overwrite);
 187
 188 static void             vm_map_fork_share(
 189         vm_map_t        old_map,
 190         vm_map_entry_t  old_entry,
 191         vm_map_t        new_map);
 192
 193 static boolean_t        vm_map_fork_copy(
 194         vm_map_t        old_map,
 195         vm_map_entry_t  *old_entry_p,
 196         vm_map_t        new_map);
 197
 198 void            vm_map_region_top_walk(
 199         vm_map_entry_t             entry,
 200         vm_region_top_info_t       top);
 201
 202 void            vm_map_region_walk(
 203         vm_map_t                   map,
 204         vm_map_offset_t            va,
 205         vm_map_entry_t             entry,
 206         vm_object_offset_t         offset,
 207         vm_object_size_t           range,
 208         vm_region_extended_info_t  extended,
 209         boolean_t                  look_for_pages);
 210
 211 static kern_return_t    vm_map_wire_nested(
 212         vm_map_t                   map,
 213         vm_map_offset_t            start,
 214         vm_map_offset_t            end,
 215         vm_prot_t                  access_type,
 216         boolean_t                  user_wire,
 217         pmap_t                     map_pmap,
 218         vm_map_offset_t            pmap_addr);
 219
 220 static kern_return_t    vm_map_unwire_nested(
 221         vm_map_t                   map,
 222         vm_map_offset_t            start,
 223         vm_map_offset_t            end,
 224         boolean_t                  user_wire,
 225         pmap_t                     map_pmap,
 226         vm_map_offset_t            pmap_addr);
 227
 228 static kern_return_t    vm_map_overwrite_submap_recurse(
 229         vm_map_t                   dst_map,
 230         vm_map_offset_t            dst_addr,
 231         vm_map_size_t              dst_size);
 232
 233 static kern_return_t    vm_map_copy_overwrite_nested(
 234         vm_map_t                   dst_map,
 235         vm_map_offset_t            dst_addr,
 236         vm_map_copy_t              copy,
 237         boolean_t                  interruptible,
 238         pmap_t                     pmap);
 239
 240 static kern_return_t    vm_map_remap_extract(
 241         vm_map_t                map,
 242         vm_map_offset_t         addr,
 243         vm_map_size_t           size,
 244         boolean_t               copy,
 245         struct vm_map_header    *map_header,
 246         vm_prot_t               *cur_protection,
 247         vm_prot_t               *max_protection,
 248         vm_inherit_t            inheritance,
 249         boolean_t               pageable);
 250
 251 static kern_return_t    vm_map_remap_range_allocate(
 252         vm_map_t                map,
 253         vm_map_address_t        *address,
 254         vm_map_size_t           size,
 255         vm_map_offset_t         mask,
 256         int                     flags,
 257         vm_map_entry_t          *map_entry);
 258
 259 static void             vm_map_region_look_for_page(
 260         vm_map_t                   map,
 261         vm_map_offset_t            va,
 262         vm_object_t                object,
 263         vm_object_offset_t         offset,
 264         int                        max_refcnt,
 265         int                        depth,
 266         vm_region_extended_info_t  extended);
 267
 268 static int              vm_map_region_count_obj_refs(
 269         vm_map_entry_t             entry,
 270         vm_object_t                object);
 271
 272
 273 static kern_return_t    vm_map_willneed(
 274         vm_map_t        map,
 275         vm_map_offset_t start,
 276         vm_map_offset_t end);
 277
 278 static kern_return_t    vm_map_reuse_pages(
 279         vm_map_t        map,
 280         vm_map_offset_t start,
 281         vm_map_offset_t end);
 282
 283 static kern_return_t    vm_map_reusable_pages(
 284         vm_map_t        map,
 285         vm_map_offset_t start,
 286         vm_map_offset_t end);
 287
 288 static kern_return_t    vm_map_can_reuse(
 289         vm_map_t        map,
 290         vm_map_offset_t start,
 291         vm_map_offset_t end);
 292
 293 /*
 294  * Macros to copy a vm_map_entry. We must be careful to correctly
 295  * manage the wired page count. vm_map_entry_copy() creates a new
 296  * map entry to the same memory - the wired count in the new entry
 297  * must be set to zero. vm_map_entry_copy_full() creates a new
 298  * entry that is identical to the old entry.  This preserves the
 299  * wire count; it's used for map splitting and zone changing in
 300  * vm_map_copyout.
 301  */
 302 #define vm_map_entry_copy(NEW,OLD) \
 303 MACRO_BEGIN                                     \
 304         *(NEW) = *(OLD);                \
 305         (NEW)->is_shared = FALSE;       \
 306         (NEW)->needs_wakeup = FALSE;    \
 307         (NEW)->in_transition = FALSE;   \
 308         (NEW)->wired_count = 0;         \
 309         (NEW)->user_wired_count = 0;    \
 310         (NEW)->permanent = FALSE;       \
 311 MACRO_END
 312
 313 #define vm_map_entry_copy_full(NEW,OLD)        (*(NEW) = *(OLD))
 314
 315 /*
 316  *      Decide if we want to allow processes to execute from their data or stack areas.
 317  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 318  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 319  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 320  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 321  *      specific pmap files since the default behavior varies according to architecture.  The
 322  *      main reason it varies is because of the need to provide binary compatibility with old
 323  *      applications that were written before these restrictions came into being.  In the old
 324  *      days, an app could execute anything it could read, but this has slowly been tightened
 325  *      up over time.  The default behavior is:
 326  *
 327  *      32-bit PPC apps         may execute from both stack and data areas
 328  *      32-bit Intel apps       may exeucte from data areas but not stack
 329  *      64-bit PPC/Intel apps   may not execute from either data or stack
 330  *
 331  *      An application on any architecture may override these defaults by explicitly
 332  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 333  *      system call.  This code here just determines what happens when an app tries to
 334  *      execute from a page that lacks execute permission.
 335  *
 336  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 337  *      default behavior for both 32 and 64 bit apps on a system-wide basis.
 338  */
 339
 340 extern int allow_data_exec, allow_stack_exec;
 341
 342 int
 343 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 344 {
 345         int current_abi;
 346
 347         /*
 348          * Determine if the app is running in 32 or 64 bit mode.
 349          */
 350
 351         if (vm_map_is_64bit(map))
 352                 current_abi = VM_ABI_64;
 353         else
 354                 current_abi = VM_ABI_32;
 355
 356         /*
 357          * Determine if we should allow the execution based on whether it's a
 358          * stack or data area and the current architecture.
 359          */
 360
 361         if (user_tag == VM_MEMORY_STACK)
 362                 return allow_stack_exec & current_abi;
 363
 364         return allow_data_exec & current_abi;
 365 }
 366
 367
 368 /*
 369  *      Virtual memory maps provide for the mapping, protection,
 370  *      and sharing of virtual memory objects.  In addition,
 371  *      this module provides for an efficient virtual copy of
 372  *      memory from one map to another.
 373  *
 374  *      Synchronization is required prior to most operations.
 375  *
 376  *      Maps consist of an ordered doubly-linked list of simple
 377  *      entries; a single hint is used to speed up lookups.
 378  *
 379  *      Sharing maps have been deleted from this version of Mach.
 380  *      All shared objects are now mapped directly into the respective
 381  *      maps.  This requires a change in the copy on write strategy;
 382  *      the asymmetric (delayed) strategy is used for shared temporary
 383  *      objects instead of the symmetric (shadow) strategy.  All maps
 384  *      are now "top level" maps (either task map, kernel map or submap
 385  *      of the kernel map).
 386  *
 387  *      Since portions of maps are specified by start/end addreses,
 388  *      which may not align with existing map entries, all
 389  *      routines merely "clip" entries to these start/end values.
 390  *      [That is, an entry is split into two, bordering at a
 391  *      start or end value.]  Note that these clippings may not
 392  *      always be necessary (as the two resulting entries are then
 393  *      not changed); however, the clipping is done for convenience.
 394  *      No attempt is currently made to "glue back together" two
 395  *      abutting entries.
 396  *
 397  *      The symmetric (shadow) copy strategy implements virtual copy
 398  *      by copying VM object references from one map to
 399  *      another, and then marking both regions as copy-on-write.
 400  *      It is important to note that only one writeable reference
 401  *      to a VM object region exists in any map when this strategy
 402  *      is used -- this means that shadow object creation can be
 403  *      delayed until a write operation occurs.  The symmetric (delayed)
 404  *      strategy allows multiple maps to have writeable references to
 405  *      the same region of a vm object, and hence cannot delay creating
 406  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 407  *      Copying of permanent objects is completely different; see
 408  *      vm_object_copy_strategically() in vm_object.c.
 409  */
 410
 411 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 412 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 413 static zone_t   vm_map_kentry_zone;     /* zone for kernel entry structures */
 414 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 415
 416
 417 /*
 418  *      Placeholder object for submap operations.  This object is dropped
 419  *      into the range by a call to vm_map_find, and removed when
 420  *      vm_map_submap creates the submap.
 421  */
 422
 423 vm_object_t     vm_submap_object;
 424
 425 static void             *map_data;
 426 static vm_size_t        map_data_size;
 427 static void             *kentry_data;
 428 static vm_size_t        kentry_data_size;
 429 static int              kentry_count = 2048;            /* to init kentry_data_size */
 430
 431 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 432
 433
 434 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 435 unsigned int not_in_kdp = 1;
 436
 437 #if CONFIG_CODE_DECRYPTION
 438 /*
 439  * vm_map_apple_protected:
 440  * This remaps the requested part of the object with an object backed by
 441  * the decrypting pager.
 442  * crypt_info contains entry points and session data for the crypt module.
 443  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 444  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 445  */
 446 kern_return_t
 447 vm_map_apple_protected(
 448         vm_map_t        map,
 449         vm_map_offset_t start,
 450         vm_map_offset_t end,
 451         struct pager_crypt_info *crypt_info)
 452 {
 453         boolean_t       map_locked;
 454         kern_return_t   kr;
 455         vm_map_entry_t  map_entry;
 456         memory_object_t protected_mem_obj;
 457         vm_object_t     protected_object;
 458         vm_map_offset_t map_addr;
 459
 460         vm_map_lock_read(map);
 461         map_locked = TRUE;
 462
 463         /* lookup the protected VM object */
 464         if (!vm_map_lookup_entry(map,
 465                                  start,
 466                                  &map_entry) ||
 467             map_entry->vme_end < end ||
 468             map_entry->is_sub_map) {
 469                 /* that memory is not properly mapped */
 470                 kr = KERN_INVALID_ARGUMENT;
 471                 goto done;
 472         }
 473         protected_object = map_entry->object.vm_object;
 474         if (protected_object == VM_OBJECT_NULL) {
 475                 /* there should be a VM object here at this point */
 476                 kr = KERN_INVALID_ARGUMENT;
 477                 goto done;
 478         }
 479
 480         /* make sure protected object stays alive while map is unlocked */
 481         vm_object_reference(protected_object);
 482
 483         vm_map_unlock_read(map);
 484         map_locked = FALSE;
 485
 486         /*
 487          * Lookup (and create if necessary) the protected memory object
 488          * matching that VM object.
 489          * If successful, this also grabs a reference on the memory object,
 490          * to guarantee that it doesn't go away before we get a chance to map
 491          * it.
 492          */
 493         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 494
 495         /* release extra ref on protected object */
 496         vm_object_deallocate(protected_object);
 497
 498         if (protected_mem_obj == NULL) {
 499                 kr = KERN_FAILURE;
 500                 goto done;
 501         }
 502
 503         /* map this memory object in place of the current one */
 504         map_addr = start;
 505         kr = vm_map_enter_mem_object(map,
 506                                      &map_addr,
 507                                      end - start,
 508                                      (mach_vm_offset_t) 0,
 509                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 510                                      (ipc_port_t) protected_mem_obj,
 511                                      (map_entry->offset +
 512                                       (start - map_entry->vme_start)),
 513                                      TRUE,
 514                                      map_entry->protection,
 515                                      map_entry->max_protection,
 516                                      map_entry->inheritance);
 517         assert(map_addr == start);
 518         /*
 519          * Release the reference obtained by apple_protect_pager_setup().
 520          * The mapping (if it succeeded) is now holding a reference on the
 521          * memory object.
 522          */
 523         memory_object_deallocate(protected_mem_obj);
 524
 525 done:
 526         if (map_locked) {
 527                 vm_map_unlock_read(map);
 528         }
 529         return kr;
 530 }
 531 #endif  /* CONFIG_CODE_DECRYPTION */
 532
 533
 534 lck_grp_t               vm_map_lck_grp;
 535 lck_grp_attr_t  vm_map_lck_grp_attr;
 536 lck_attr_t              vm_map_lck_attr;
 537
 538
 539 /*
 540  *      vm_map_init:
 541  *
 542  *      Initialize the vm_map module.  Must be called before
 543  *      any other vm_map routines.
 544  *
 545  *      Map and entry structures are allocated from zones -- we must
 546  *      initialize those zones.
 547  *
 548  *      There are three zones of interest:
 549  *
 550  *      vm_map_zone:            used to allocate maps.
 551  *      vm_map_entry_zone:      used to allocate map entries.
 552  *      vm_map_kentry_zone:     used to allocate map entries for the kernel.
 553  *
 554  *      The kernel allocates map entries from a special zone that is initially
 555  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 556  *      the kernel to allocate more memory to a entry zone when it became
 557  *      empty since the very act of allocating memory implies the creation
 558  *      of a new entry.
 559  */
 560 void
 561 vm_map_init(
 562         void)
 563 {
 564         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 565                             PAGE_SIZE, "maps");
 566         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 567
 568
 569         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 570                                   1024*1024, PAGE_SIZE*5,
 571                                   "non-kernel map entries");
 572         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 573
 574         vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 575                                    kentry_data_size, kentry_data_size,
 576                                    "kernel map entries");
 577         zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE);
 578
 579         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 580                                  16*1024, PAGE_SIZE, "map copies");
 581         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 582
 583         /*
 584          *      Cram the map and kentry zones with initial data.
 585          *      Set kentry_zone non-collectible to aid zone_gc().
 586          */
 587         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 588         zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE);
 589         zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE);
 590         zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE);
 591         zcram(vm_map_zone, map_data, map_data_size);
 592         zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
 593
 594         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 595         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 596         lck_attr_setdefault(&vm_map_lck_attr);
 597 }
 598
 599 void
 600 vm_map_steal_memory(
 601         void)
 602 {
 603         map_data_size = round_page(10 * sizeof(struct _vm_map));
 604         map_data = pmap_steal_memory(map_data_size);
 605
 606 #if 0
 607         /*
 608          * Limiting worst case: vm_map_kentry_zone needs to map each "available"
 609          * physical page (i.e. that beyond the kernel image and page tables)
 610          * individually; we guess at most one entry per eight pages in the
 611          * real world. This works out to roughly .1 of 1% of physical memory,
 612          * or roughly 1900 entries (64K) for a 64M machine with 4K pages.
 613          */
 614 #endif
 615         kentry_count = pmap_free_pages() / 8;
 616
 617
 618         kentry_data_size =
 619                 round_page(kentry_count * sizeof(struct vm_map_entry));
 620         kentry_data = pmap_steal_memory(kentry_data_size);
 621 }
 622
 623 /*
 624  *      vm_map_create:
 625  *
 626  *      Creates and returns a new empty VM map with
 627  *      the given physical map structure, and having
 628  *      the given lower and upper address bounds.
 629  */
 630 vm_map_t
 631 vm_map_create(
 632         pmap_t                  pmap,
 633         vm_map_offset_t min,
 634         vm_map_offset_t max,
 635         boolean_t               pageable)
 636 {
 637         static int              color_seed = 0;
 638         register vm_map_t       result;
 639
 640         result = (vm_map_t) zalloc(vm_map_zone);
 641         if (result == VM_MAP_NULL)
 642                 panic("vm_map_create");
 643
 644         vm_map_first_entry(result) = vm_map_to_entry(result);
 645         vm_map_last_entry(result)  = vm_map_to_entry(result);
 646         result->hdr.nentries = 0;
 647         result->hdr.entries_pageable = pageable;
 648
 649         result->size = 0;
 650         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 651         result->user_wire_size  = 0;
 652         result->ref_count = 1;
 653 #if     TASK_SWAPPER
 654         result->res_count = 1;
 655         result->sw_state = MAP_SW_IN;
 656 #endif  /* TASK_SWAPPER */
 657         result->pmap = pmap;
 658         result->min_offset = min;
 659         result->max_offset = max;
 660         result->wiring_required = FALSE;
 661         result->no_zero_fill = FALSE;
 662         result->mapped = FALSE;
 663         result->wait_for_space = FALSE;
 664         result->switch_protect = FALSE;
 665         result->first_free = vm_map_to_entry(result);
 666         result->hint = vm_map_to_entry(result);
 667         result->color_rr = (color_seed++) & vm_color_mask;
 668         vm_map_lock_init(result);
 669         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 670
 671         return(result);
 672 }
 673
 674 /*
 675  *      vm_map_entry_create:    [ internal use only ]
 676  *
 677  *      Allocates a VM map entry for insertion in the
 678  *      given map (or map copy).  No fields are filled.
 679  */
 680 #define vm_map_entry_create(map) \
 681         _vm_map_entry_create(&(map)->hdr)
 682
 683 #define vm_map_copy_entry_create(copy) \
 684         _vm_map_entry_create(&(copy)->cpy_hdr)
 685
 686 static vm_map_entry_t
 687 _vm_map_entry_create(
 688         register struct vm_map_header   *map_header)
 689 {
 690         register zone_t zone;
 691         register vm_map_entry_t entry;
 692
 693         if (map_header->entries_pageable)
 694                 zone = vm_map_entry_zone;
 695         else
 696                 zone = vm_map_kentry_zone;
 697
 698         entry = (vm_map_entry_t) zalloc(zone);
 699         if (entry == VM_MAP_ENTRY_NULL)
 700                 panic("vm_map_entry_create");
 701
 702         return(entry);
 703 }
 704
 705 /*
 706  *      vm_map_entry_dispose:   [ internal use only ]
 707  *
 708  *      Inverse of vm_map_entry_create.
 709  *
 710  *      write map lock held so no need to
 711  *      do anything special to insure correctness
 712  *      of the stores
 713  */
 714 #define vm_map_entry_dispose(map, entry)                        \
 715         MACRO_BEGIN                                             \
 716         if((entry) == (map)->first_free)                        \
 717                 (map)->first_free = vm_map_to_entry(map);       \
 718         if((entry) == (map)->hint)                              \
 719                 (map)->hint = vm_map_to_entry(map);             \
 720         _vm_map_entry_dispose(&(map)->hdr, (entry));            \
 721         MACRO_END
 722
 723 #define vm_map_copy_entry_dispose(map, entry) \
 724         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 725
 726 static void
 727 _vm_map_entry_dispose(
 728         register struct vm_map_header   *map_header,
 729         register vm_map_entry_t         entry)
 730 {
 731         register zone_t         zone;
 732
 733         if (map_header->entries_pageable)
 734                 zone = vm_map_entry_zone;
 735         else
 736                 zone = vm_map_kentry_zone;
 737
 738         zfree(zone, entry);
 739 }
 740
 741 #if MACH_ASSERT
 742 static boolean_t first_free_is_valid(vm_map_t map);     /* forward */
 743 static boolean_t first_free_check = FALSE;
 744 static boolean_t
 745 first_free_is_valid(
 746         vm_map_t        map)
 747 {
 748         vm_map_entry_t  entry, next;
 749
 750         if (!first_free_check)
 751                 return TRUE;
 752
 753         entry = vm_map_to_entry(map);
 754         next = entry->vme_next;
 755         while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) ||
 756                (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) &&
 757                 next != vm_map_to_entry(map))) {
 758                 entry = next;
 759                 next = entry->vme_next;
 760                 if (entry == vm_map_to_entry(map))
 761                         break;
 762         }
 763         if (map->first_free != entry) {
 764                 printf("Bad first_free for map %p: %p should be %p\n",
 765                        map, map->first_free, entry);
 766                 return FALSE;
 767         }
 768         return TRUE;
 769 }
 770 #endif /* MACH_ASSERT */
 771
 772 /*
 773  *      UPDATE_FIRST_FREE:
 774  *
 775  *      Updates the map->first_free pointer to the
 776  *      entry immediately before the first hole in the map.
 777  *      The map should be locked.
 778  */
 779 #define UPDATE_FIRST_FREE(map, new_first_free)                          \
 780         MACRO_BEGIN                                                     \
 781         vm_map_t        UFF_map;                                        \
 782         vm_map_entry_t  UFF_first_free;                                 \
 783         vm_map_entry_t  UFF_next_entry;                                 \
 784         UFF_map = (map);                                                \
 785         UFF_first_free = (new_first_free);                              \
 786         UFF_next_entry = UFF_first_free->vme_next;                      \
 787         while (vm_map_trunc_page(UFF_next_entry->vme_start) ==          \
 788                vm_map_trunc_page(UFF_first_free->vme_end) ||                    \
 789                (vm_map_trunc_page(UFF_next_entry->vme_start) ==                 \
 790                 vm_map_trunc_page(UFF_first_free->vme_start) &&         \
 791                 UFF_next_entry != vm_map_to_entry(UFF_map))) {          \
 792                 UFF_first_free = UFF_next_entry;                        \
 793                 UFF_next_entry = UFF_first_free->vme_next;              \
 794                 if (UFF_first_free == vm_map_to_entry(UFF_map))         \
 795                         break;                                          \
 796         }                                                               \
 797         UFF_map->first_free = UFF_first_free;                           \
 798         assert(first_free_is_valid(UFF_map));                           \
 799         MACRO_END
 800
 801 /*
 802  *      vm_map_entry_{un,}link:
 803  *
 804  *      Insert/remove entries from maps (or map copies).
 805  */
 806 #define vm_map_entry_link(map, after_where, entry)                      \
 807         MACRO_BEGIN                                                     \
 808         vm_map_t VMEL_map;                                              \
 809         vm_map_entry_t VMEL_entry;                                      \
 810         VMEL_map = (map);                                               \
 811         VMEL_entry = (entry);                                           \
 812         _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry);    \
 813         UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free);              \
 814         MACRO_END
 815
 816
 817 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 818         _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 819
 820 #define _vm_map_entry_link(hdr, after_where, entry)                     \
 821         MACRO_BEGIN                                                     \
 822         (hdr)->nentries++;                                              \
 823         (entry)->vme_prev = (after_where);                              \
 824         (entry)->vme_next = (after_where)->vme_next;                    \
 825         (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \
 826         MACRO_END
 827
 828 #define vm_map_entry_unlink(map, entry)                                 \
 829         MACRO_BEGIN                                                     \
 830         vm_map_t VMEU_map;                                              \
 831         vm_map_entry_t VMEU_entry;                                      \
 832         vm_map_entry_t VMEU_first_free;                                 \
 833         VMEU_map = (map);                                               \
 834         VMEU_entry = (entry);                                           \
 835         if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start)   \
 836                 VMEU_first_free = VMEU_entry->vme_prev;                 \
 837         else                                                            \
 838                 VMEU_first_free = VMEU_map->first_free;                 \
 839         _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry);               \
 840         UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free);                   \
 841         MACRO_END
 842
 843 #define vm_map_copy_entry_unlink(copy, entry)                           \
 844         _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry))
 845
 846 #define _vm_map_entry_unlink(hdr, entry)                                \
 847         MACRO_BEGIN                                                     \
 848         (hdr)->nentries--;                                              \
 849         (entry)->vme_next->vme_prev = (entry)->vme_prev;                \
 850         (entry)->vme_prev->vme_next = (entry)->vme_next;                \
 851         MACRO_END
 852
 853 #if     MACH_ASSERT && TASK_SWAPPER
 854 /*
 855  *      vm_map_res_reference:
 856  *
 857  *      Adds another valid residence count to the given map.
 858  *
 859  *      Map is locked so this function can be called from
 860  *      vm_map_swapin.
 861  *
 862  */
 863 void vm_map_res_reference(register vm_map_t map)
 864 {
 865         /* assert map is locked */
 866         assert(map->res_count >= 0);
 867         assert(map->ref_count >= map->res_count);
 868         if (map->res_count == 0) {
 869                 lck_mtx_unlock(&map->s_lock);
 870                 vm_map_lock(map);
 871                 vm_map_swapin(map);
 872                 lck_mtx_lock(&map->s_lock);
 873                 ++map->res_count;
 874                 vm_map_unlock(map);
 875         } else
 876                 ++map->res_count;
 877 }
 878
 879 /*
 880  *      vm_map_reference_swap:
 881  *
 882  *      Adds valid reference and residence counts to the given map.
 883  *
 884  *      The map may not be in memory (i.e. zero residence count).
 885  *
 886  */
 887 void vm_map_reference_swap(register vm_map_t map)
 888 {
 889         assert(map != VM_MAP_NULL);
 890         lck_mtx_lock(&map->s_lock);
 891         assert(map->res_count >= 0);
 892         assert(map->ref_count >= map->res_count);
 893         map->ref_count++;
 894         vm_map_res_reference(map);
 895         lck_mtx_unlock(&map->s_lock);
 896 }
 897
 898 /*
 899  *      vm_map_res_deallocate:
 900  *
 901  *      Decrement residence count on a map; possibly causing swapout.
 902  *
 903  *      The map must be in memory (i.e. non-zero residence count).
 904  *
 905  *      The map is locked, so this function is callable from vm_map_deallocate.
 906  *
 907  */
 908 void vm_map_res_deallocate(register vm_map_t map)
 909 {
 910         assert(map->res_count > 0);
 911         if (--map->res_count == 0) {
 912                 lck_mtx_unlock(&map->s_lock);
 913                 vm_map_lock(map);
 914                 vm_map_swapout(map);
 915                 vm_map_unlock(map);
 916                 lck_mtx_lock(&map->s_lock);
 917         }
 918         assert(map->ref_count >= map->res_count);
 919 }
 920 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 921
 922 /*
 923  *      vm_map_destroy:
 924  *
 925  *      Actually destroy a map.
 926  */
 927 void
 928 vm_map_destroy(
 929         vm_map_t        map,
 930         int             flags)
 931 {
 932         vm_map_lock(map);
 933
 934         /* clean up regular map entries */
 935         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 936                              flags, VM_MAP_NULL);
 937         /* clean up leftover special mappings (commpage, etc...) */
 938 #ifdef __ppc__
 939         /*
 940          * PPC51: ppc64 is limited to 51-bit addresses.
 941          * Memory beyond this 51-bit limit is mapped specially at the
 942          * pmap level, so do not interfere.
 943          * On PPC64, the commpage is mapped beyond the addressable range
 944          * via a special pmap hack, so ask pmap to clean it explicitly...
 945          */
 946         if (map->pmap) {
 947                 pmap_unmap_sharedpage(map->pmap);
 948         }
 949         /* ... and do not let regular pmap cleanup apply here */
 950         flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP;
 951 #endif /* __ppc__ */
 952         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 953                              flags, VM_MAP_NULL);
 954         vm_map_unlock(map);
 955
 956         assert(map->hdr.nentries == 0);
 957
 958         if(map->pmap)
 959                 pmap_destroy(map->pmap);
 960
 961         zfree(vm_map_zone, map);
 962 }
 963
 964 #if     TASK_SWAPPER
 965 /*
 966  * vm_map_swapin/vm_map_swapout
 967  *
 968  * Swap a map in and out, either referencing or releasing its resources.
 969  * These functions are internal use only; however, they must be exported
 970  * because they may be called from macros, which are exported.
 971  *
 972  * In the case of swapout, there could be races on the residence count,
 973  * so if the residence count is up, we return, assuming that a
 974  * vm_map_deallocate() call in the near future will bring us back.
 975  *
 976  * Locking:
 977  *      -- We use the map write lock for synchronization among races.
 978  *      -- The map write lock, and not the simple s_lock, protects the
 979  *         swap state of the map.
 980  *      -- If a map entry is a share map, then we hold both locks, in
 981  *         hierarchical order.
 982  *
 983  * Synchronization Notes:
 984  *      1) If a vm_map_swapin() call happens while swapout in progress, it
 985  *      will block on the map lock and proceed when swapout is through.
 986  *      2) A vm_map_reference() call at this time is illegal, and will
 987  *      cause a panic.  vm_map_reference() is only allowed on resident
 988  *      maps, since it refuses to block.
 989  *      3) A vm_map_swapin() call during a swapin will block, and
 990  *      proceeed when the first swapin is done, turning into a nop.
 991  *      This is the reason the res_count is not incremented until
 992  *      after the swapin is complete.
 993  *      4) There is a timing hole after the checks of the res_count, before
 994  *      the map lock is taken, during which a swapin may get the lock
 995  *      before a swapout about to happen.  If this happens, the swapin
 996  *      will detect the state and increment the reference count, causing
 997  *      the swapout to be a nop, thereby delaying it until a later
 998  *      vm_map_deallocate.  If the swapout gets the lock first, then
 999  *      the swapin will simply block until the swapout is done, and
1000  *      then proceed.
1001  *
1002  * Because vm_map_swapin() is potentially an expensive operation, it
1003  * should be used with caution.
1004  *
1005  * Invariants:
1006  *      1) A map with a residence count of zero is either swapped, or
1007  *         being swapped.
1008  *      2) A map with a non-zero residence count is either resident,
1009  *         or being swapped in.
1010  */
1011
1012 int vm_map_swap_enable = 1;
1013
1014 void vm_map_swapin (vm_map_t map)
1015 {
1016         register vm_map_entry_t entry;
1017
1018         if (!vm_map_swap_enable)        /* debug */
1019                 return;
1020
1021         /*
1022          * Map is locked
1023          * First deal with various races.
1024          */
1025         if (map->sw_state == MAP_SW_IN)
1026                 /*
1027                  * we raced with swapout and won.  Returning will incr.
1028                  * the res_count, turning the swapout into a nop.
1029                  */
1030                 return;
1031
1032         /*
1033          * The residence count must be zero.  If we raced with another
1034          * swapin, the state would have been IN; if we raced with a
1035          * swapout (after another competing swapin), we must have lost
1036          * the race to get here (see above comment), in which case
1037          * res_count is still 0.
1038          */
1039         assert(map->res_count == 0);
1040
1041         /*
1042          * There are no intermediate states of a map going out or
1043          * coming in, since the map is locked during the transition.
1044          */
1045         assert(map->sw_state == MAP_SW_OUT);
1046
1047         /*
1048          * We now operate upon each map entry.  If the entry is a sub-
1049          * or share-map, we call vm_map_res_reference upon it.
1050          * If the entry is an object, we call vm_object_res_reference
1051          * (this may iterate through the shadow chain).
1052          * Note that we hold the map locked the entire time,
1053          * even if we get back here via a recursive call in
1054          * vm_map_res_reference.
1055          */
1056         entry = vm_map_first_entry(map);
1057
1058         while (entry != vm_map_to_entry(map)) {
1059                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1060                         if (entry->is_sub_map) {
1061                                 vm_map_t lmap = entry->object.sub_map;
1062                                 lck_mtx_lock(&lmap->s_lock);
1063                                 vm_map_res_reference(lmap);
1064                                 lck_mtx_unlock(&lmap->s_lock);
1065                         } else {
1066                                 vm_object_t object = entry->object.vm_object;
1067                                 vm_object_lock(object);
1068                                 /*
1069                                  * This call may iterate through the
1070                                  * shadow chain.
1071                                  */
1072                                 vm_object_res_reference(object);
1073                                 vm_object_unlock(object);
1074                         }
1075                 }
1076                 entry = entry->vme_next;
1077         }
1078         assert(map->sw_state == MAP_SW_OUT);
1079         map->sw_state = MAP_SW_IN;
1080 }
1081
1082 void vm_map_swapout(vm_map_t map)
1083 {
1084         register vm_map_entry_t entry;
1085
1086         /*
1087          * Map is locked
1088          * First deal with various races.
1089          * If we raced with a swapin and lost, the residence count
1090          * will have been incremented to 1, and we simply return.
1091          */
1092         lck_mtx_lock(&map->s_lock);
1093         if (map->res_count != 0) {
1094                 lck_mtx_unlock(&map->s_lock);
1095                 return;
1096         }
1097         lck_mtx_unlock(&map->s_lock);
1098
1099         /*
1100          * There are no intermediate states of a map going out or
1101          * coming in, since the map is locked during the transition.
1102          */
1103         assert(map->sw_state == MAP_SW_IN);
1104
1105         if (!vm_map_swap_enable)
1106                 return;
1107
1108         /*
1109          * We now operate upon each map entry.  If the entry is a sub-
1110          * or share-map, we call vm_map_res_deallocate upon it.
1111          * If the entry is an object, we call vm_object_res_deallocate
1112          * (this may iterate through the shadow chain).
1113          * Note that we hold the map locked the entire time,
1114          * even if we get back here via a recursive call in
1115          * vm_map_res_deallocate.
1116          */
1117         entry = vm_map_first_entry(map);
1118
1119         while (entry != vm_map_to_entry(map)) {
1120                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1121                         if (entry->is_sub_map) {
1122                                 vm_map_t lmap = entry->object.sub_map;
1123                                 lck_mtx_lock(&lmap->s_lock);
1124                                 vm_map_res_deallocate(lmap);
1125                                 lck_mtx_unlock(&lmap->s_lock);
1126                         } else {
1127                                 vm_object_t object = entry->object.vm_object;
1128                                 vm_object_lock(object);
1129                                 /*
1130                                  * This call may take a long time,
1131                                  * since it could actively push
1132                                  * out pages (if we implement it
1133                                  * that way).
1134                                  */
1135                                 vm_object_res_deallocate(object);
1136                                 vm_object_unlock(object);
1137                         }
1138                 }
1139                 entry = entry->vme_next;
1140         }
1141         assert(map->sw_state == MAP_SW_IN);
1142         map->sw_state = MAP_SW_OUT;
1143 }
1144
1145 #endif  /* TASK_SWAPPER */
1146
1147
1148 /*
1149  *      SAVE_HINT_MAP_READ:
1150  *
1151  *      Saves the specified entry as the hint for
1152  *      future lookups.  only a read lock is held on map,
1153  *      so make sure the store is atomic... OSCompareAndSwap
1154  *      guarantees this... also, we don't care if we collide
1155  *      and someone else wins and stores their 'hint'
1156  */
1157 #define SAVE_HINT_MAP_READ(map,value) \
1158         MACRO_BEGIN                                                     \
1159         OSCompareAndSwapPtr((map)->hint, value, &(map)->hint); \
1160         MACRO_END
1161
1162
1163 /*
1164  *      SAVE_HINT_MAP_WRITE:
1165  *
1166  *      Saves the specified entry as the hint for
1167  *      future lookups.  write lock held on map,
1168  *      so no one else can be writing or looking
1169  *      until the lock is dropped, so it's safe
1170  *      to just do an assignment
1171  */
1172 #define SAVE_HINT_MAP_WRITE(map,value) \
1173         MACRO_BEGIN                    \
1174         (map)->hint = (value);         \
1175         MACRO_END
1176
1177 /*
1178  *      vm_map_lookup_entry:    [ internal use only ]
1179  *
1180  *      Finds the map entry containing (or
1181  *      immediately preceding) the specified address
1182  *      in the given map; the entry is returned
1183  *      in the "entry" parameter.  The boolean
1184  *      result indicates whether the address is
1185  *      actually contained in the map.
1186  */
1187 boolean_t
1188 vm_map_lookup_entry(
1189         register vm_map_t               map,
1190         register vm_map_offset_t        address,
1191         vm_map_entry_t          *entry)         /* OUT */
1192 {
1193         register vm_map_entry_t         cur;
1194         register vm_map_entry_t         last;
1195
1196         /*
1197          *      Start looking either from the head of the
1198          *      list, or from the hint.
1199          */
1200         cur = map->hint;
1201
1202         if (cur == vm_map_to_entry(map))
1203                 cur = cur->vme_next;
1204
1205         if (address >= cur->vme_start) {
1206                 /*
1207                  *      Go from hint to end of list.
1208                  *
1209                  *      But first, make a quick check to see if
1210                  *      we are already looking at the entry we
1211                  *      want (which is usually the case).
1212                  *      Note also that we don't need to save the hint
1213                  *      here... it is the same hint (unless we are
1214                  *      at the header, in which case the hint didn't
1215                  *      buy us anything anyway).
1216                  */
1217                 last = vm_map_to_entry(map);
1218                 if ((cur != last) && (cur->vme_end > address)) {
1219                         *entry = cur;
1220                         return(TRUE);
1221                 }
1222         }
1223         else {
1224                 /*
1225                  *      Go from start to hint, *inclusively*
1226                  */
1227                 last = cur->vme_next;
1228                 cur = vm_map_first_entry(map);
1229         }
1230
1231         /*
1232          *      Search linearly
1233          */
1234
1235         while (cur != last) {
1236                 if (cur->vme_end > address) {
1237                         if (address >= cur->vme_start) {
1238                                 /*
1239                                  *      Save this lookup for future
1240                                  *      hints, and return
1241                                  */
1242
1243                                 *entry = cur;
1244                                 SAVE_HINT_MAP_READ(map, cur);
1245
1246                                 return(TRUE);
1247                         }
1248                         break;
1249                 }
1250                 cur = cur->vme_next;
1251         }
1252         *entry = cur->vme_prev;
1253         SAVE_HINT_MAP_READ(map, *entry);
1254
1255         return(FALSE);
1256 }
1257
1258 /*
1259  *      Routine:        vm_map_find_space
1260  *      Purpose:
1261  *              Allocate a range in the specified virtual address map,
1262  *              returning the entry allocated for that range.
1263  *              Used by kmem_alloc, etc.
1264  *
1265  *              The map must be NOT be locked. It will be returned locked
1266  *              on KERN_SUCCESS, unlocked on failure.
1267  *
1268  *              If an entry is allocated, the object/offset fields
1269  *              are initialized to zero.
1270  */
1271 kern_return_t
1272 vm_map_find_space(
1273         register vm_map_t       map,
1274         vm_map_offset_t         *address,       /* OUT */
1275         vm_map_size_t           size,
1276         vm_map_offset_t         mask,
1277         int                     flags,
1278         vm_map_entry_t          *o_entry)       /* OUT */
1279 {
1280         register vm_map_entry_t entry, new_entry;
1281         register vm_map_offset_t        start;
1282         register vm_map_offset_t        end;
1283
1284         if (size == 0) {
1285                 *address = 0;
1286                 return KERN_INVALID_ARGUMENT;
1287         }
1288
1289         if (flags & VM_FLAGS_GUARD_AFTER) {
1290                 /* account for the back guard page in the size */
1291                 size += PAGE_SIZE_64;
1292         }
1293
1294         new_entry = vm_map_entry_create(map);
1295
1296         /*
1297          *      Look for the first possible address; if there's already
1298          *      something at this address, we have to start after it.
1299          */
1300
1301         vm_map_lock(map);
1302
1303         assert(first_free_is_valid(map));
1304         if ((entry = map->first_free) == vm_map_to_entry(map))
1305                 start = map->min_offset;
1306         else
1307                 start = entry->vme_end;
1308
1309         /*
1310          *      In any case, the "entry" always precedes
1311          *      the proposed new region throughout the loop:
1312          */
1313
1314         while (TRUE) {
1315                 register vm_map_entry_t next;
1316
1317                 /*
1318                  *      Find the end of the proposed new region.
1319                  *      Be sure we didn't go beyond the end, or
1320                  *      wrap around the address.
1321                  */
1322
1323                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1324                         /* reserve space for the front guard page */
1325                         start += PAGE_SIZE_64;
1326                 }
1327                 end = ((start + mask) & ~mask);
1328
1329                 if (end < start) {
1330                         vm_map_entry_dispose(map, new_entry);
1331                         vm_map_unlock(map);
1332                         return(KERN_NO_SPACE);
1333                 }
1334                 start = end;
1335                 end += size;
1336
1337                 if ((end > map->max_offset) || (end < start)) {
1338                         vm_map_entry_dispose(map, new_entry);
1339                         vm_map_unlock(map);
1340                         return(KERN_NO_SPACE);
1341                 }
1342
1343                 /*
1344                  *      If there are no more entries, we must win.
1345                  */
1346
1347                 next = entry->vme_next;
1348                 if (next == vm_map_to_entry(map))
1349                         break;
1350
1351                 /*
1352                  *      If there is another entry, it must be
1353                  *      after the end of the potential new region.
1354                  */
1355
1356                 if (next->vme_start >= end)
1357                         break;
1358
1359                 /*
1360                  *      Didn't fit -- move to the next entry.
1361                  */
1362
1363                 entry = next;
1364                 start = entry->vme_end;
1365         }
1366
1367         /*
1368          *      At this point,
1369          *              "start" and "end" should define the endpoints of the
1370          *                      available new range, and
1371          *              "entry" should refer to the region before the new
1372          *                      range, and
1373          *
1374          *              the map should be locked.
1375          */
1376
1377         if (flags & VM_FLAGS_GUARD_BEFORE) {
1378                 /* go back for the front guard page */
1379                 start -= PAGE_SIZE_64;
1380         }
1381         *address = start;
1382
1383         new_entry->vme_start = start;
1384         new_entry->vme_end = end;
1385         assert(page_aligned(new_entry->vme_start));
1386         assert(page_aligned(new_entry->vme_end));
1387
1388         new_entry->is_shared = FALSE;
1389         new_entry->is_sub_map = FALSE;
1390         new_entry->use_pmap = FALSE;
1391         new_entry->object.vm_object = VM_OBJECT_NULL;
1392         new_entry->offset = (vm_object_offset_t) 0;
1393
1394         new_entry->needs_copy = FALSE;
1395
1396         new_entry->inheritance = VM_INHERIT_DEFAULT;
1397         new_entry->protection = VM_PROT_DEFAULT;
1398         new_entry->max_protection = VM_PROT_ALL;
1399         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1400         new_entry->wired_count = 0;
1401         new_entry->user_wired_count = 0;
1402
1403         new_entry->in_transition = FALSE;
1404         new_entry->needs_wakeup = FALSE;
1405         new_entry->no_cache = FALSE;
1406         new_entry->permanent = FALSE;
1407         new_entry->superpage_size = 0;
1408
1409         new_entry->alias = 0;
1410         new_entry->zero_wired_pages = FALSE;
1411
1412         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1413
1414         /*
1415          *      Insert the new entry into the list
1416          */
1417
1418         vm_map_entry_link(map, entry, new_entry);
1419
1420         map->size += size;
1421
1422         /*
1423          *      Update the lookup hint
1424          */
1425         SAVE_HINT_MAP_WRITE(map, new_entry);
1426
1427         *o_entry = new_entry;
1428         return(KERN_SUCCESS);
1429 }
1430
1431 int vm_map_pmap_enter_print = FALSE;
1432 int vm_map_pmap_enter_enable = FALSE;
1433
1434 /*
1435  *      Routine:        vm_map_pmap_enter [internal only]
1436  *
1437  *      Description:
1438  *              Force pages from the specified object to be entered into
1439  *              the pmap at the specified address if they are present.
1440  *              As soon as a page not found in the object the scan ends.
1441  *
1442  *      Returns:
1443  *              Nothing.
1444  *
1445  *      In/out conditions:
1446  *              The source map should not be locked on entry.
1447  */
1448 static void
1449 vm_map_pmap_enter(
1450         vm_map_t                map,
1451         register vm_map_offset_t        addr,
1452         register vm_map_offset_t        end_addr,
1453         register vm_object_t    object,
1454         vm_object_offset_t      offset,
1455         vm_prot_t               protection)
1456 {
1457         int                     type_of_fault;
1458         kern_return_t           kr;
1459
1460         if(map->pmap == 0)
1461                 return;
1462
1463         while (addr < end_addr) {
1464                 register vm_page_t      m;
1465
1466                 vm_object_lock(object);
1467
1468                 m = vm_page_lookup(object, offset);
1469                 /*
1470                  * ENCRYPTED SWAP:
1471                  * The user should never see encrypted data, so do not
1472                  * enter an encrypted page in the page table.
1473                  */
1474                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1475                     m->fictitious ||
1476                     (m->unusual && ( m->error || m->restart || m->absent))) {
1477                         vm_object_unlock(object);
1478                         return;
1479                 }
1480
1481                 if (vm_map_pmap_enter_print) {
1482                         printf("vm_map_pmap_enter:");
1483                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1484                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1485                 }
1486                 type_of_fault = DBG_CACHE_HIT_FAULT;
1487                 kr = vm_fault_enter(m, map->pmap, addr, protection,
1488                                     VM_PAGE_WIRED(m), FALSE, FALSE,
1489                                     &type_of_fault);
1490
1491                 vm_object_unlock(object);
1492
1493                 offset += PAGE_SIZE_64;
1494                 addr += PAGE_SIZE;
1495         }
1496 }
1497
1498 boolean_t vm_map_pmap_is_empty(
1499         vm_map_t        map,
1500         vm_map_offset_t start,
1501         vm_map_offset_t end);
1502 boolean_t vm_map_pmap_is_empty(
1503         vm_map_t        map,
1504         vm_map_offset_t start,
1505         vm_map_offset_t end)
1506 {
1507 #ifdef MACHINE_PMAP_IS_EMPTY
1508         return pmap_is_empty(map->pmap, start, end);
1509 #else   /* MACHINE_PMAP_IS_EMPTY */
1510         vm_map_offset_t offset;
1511         ppnum_t         phys_page;
1512
1513         if (map->pmap == NULL) {
1514                 return TRUE;
1515         }
1516
1517         for (offset = start;
1518              offset < end;
1519              offset += PAGE_SIZE) {
1520                 phys_page = pmap_find_phys(map->pmap, offset);
1521                 if (phys_page) {
1522                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1523                                 "page %d at 0x%llx\n",
1524                                 map, (long long)start, (long long)end,
1525                                 phys_page, (long long)offset);
1526                         return FALSE;
1527                 }
1528         }
1529         return TRUE;
1530 #endif  /* MACHINE_PMAP_IS_EMPTY */
1531 }
1532
1533 /*
1534  *      Routine:        vm_map_enter
1535  *
1536  *      Description:
1537  *              Allocate a range in the specified virtual address map.
1538  *              The resulting range will refer to memory defined by
1539  *              the given memory object and offset into that object.
1540  *
1541  *              Arguments are as defined in the vm_map call.
1542  */
1543 int _map_enter_debug = 0;
1544 static unsigned int vm_map_enter_restore_successes = 0;
1545 static unsigned int vm_map_enter_restore_failures = 0;
1546 kern_return_t
1547 vm_map_enter(
1548         vm_map_t                map,
1549         vm_map_offset_t         *address,       /* IN/OUT */
1550         vm_map_size_t           size,
1551         vm_map_offset_t         mask,
1552         int                     flags,
1553         vm_object_t             object,
1554         vm_object_offset_t      offset,
1555         boolean_t               needs_copy,
1556         vm_prot_t               cur_protection,
1557         vm_prot_t               max_protection,
1558         vm_inherit_t            inheritance)
1559 {
1560         vm_map_entry_t          entry, new_entry;
1561         vm_map_offset_t         start, tmp_start, tmp_offset;
1562         vm_map_offset_t         end, tmp_end;
1563         vm_map_offset_t         tmp2_start, tmp2_end;
1564         vm_map_offset_t         step;
1565         kern_return_t           result = KERN_SUCCESS;
1566         vm_map_t                zap_old_map = VM_MAP_NULL;
1567         vm_map_t                zap_new_map = VM_MAP_NULL;
1568         boolean_t               map_locked = FALSE;
1569         boolean_t               pmap_empty = TRUE;
1570         boolean_t               new_mapping_established = FALSE;
1571         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1572         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1573         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1574         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1575         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1576         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1577         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1578         char                    alias;
1579         vm_map_offset_t         effective_min_offset, effective_max_offset;
1580         kern_return_t           kr;
1581
1582         if (superpage_size) {
1583                 switch (superpage_size) {
1584                         /*
1585                          * Note that the current implementation only supports
1586                          * a single size for superpages, SUPERPAGE_SIZE, per
1587                          * architecture. As soon as more sizes are supposed
1588                          * to be supported, SUPERPAGE_SIZE has to be replaced
1589                          * with a lookup of the size depending on superpage_size.
1590                          */
1591 #ifdef __x86_64__
1592                         case SUPERPAGE_SIZE_2MB:
1593                                 break;
1594 #endif
1595                         default:
1596                                 return KERN_INVALID_ARGUMENT;
1597                 }
1598                 mask = SUPERPAGE_SIZE-1;
1599                 if (size & (SUPERPAGE_SIZE-1))
1600                         return KERN_INVALID_ARGUMENT;
1601                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1602         }
1603
1604 #if CONFIG_EMBEDDED
1605         if (cur_protection & VM_PROT_WRITE) {
1606                 if (cur_protection & VM_PROT_EXECUTE) {
1607                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1608                         cur_protection &= ~VM_PROT_EXECUTE;
1609                 }
1610         }
1611 #endif /* CONFIG_EMBEDDED */
1612
1613         if (is_submap) {
1614                 if (purgable) {
1615                         /* submaps can not be purgeable */
1616                         return KERN_INVALID_ARGUMENT;
1617                 }
1618                 if (object == VM_OBJECT_NULL) {
1619                         /* submaps can not be created lazily */
1620                         return KERN_INVALID_ARGUMENT;
1621                 }
1622         }
1623         if (flags & VM_FLAGS_ALREADY) {
1624                 /*
1625                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1626                  * is already present.  For it to be meaningul, the requested
1627                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1628                  * we shouldn't try and remove what was mapped there first
1629                  * (!VM_FLAGS_OVERWRITE).
1630                  */
1631                 if ((flags & VM_FLAGS_ANYWHERE) ||
1632                     (flags & VM_FLAGS_OVERWRITE)) {
1633                         return KERN_INVALID_ARGUMENT;
1634                 }
1635         }
1636
1637         if (flags & VM_FLAGS_BELOW_MIN) {
1638                 /*
1639                  * Allow an insertion below the map's min offset.
1640                  */
1641                 effective_min_offset = 0ULL;
1642         } else {
1643                 effective_min_offset = map->min_offset;
1644         }
1645
1646         if (flags & VM_FLAGS_BEYOND_MAX) {
1647                 /*
1648                  * Allow an insertion beyond the map's max offset.
1649                  */
1650                 if (vm_map_is_64bit(map))
1651                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1652                 else
1653                         effective_max_offset = 0x00000000FFFFF000ULL;
1654         } else {
1655                 effective_max_offset = map->max_offset;
1656         }
1657
1658         if (size == 0 ||
1659             (offset & PAGE_MASK_64) != 0) {
1660                 *address = 0;
1661                 return KERN_INVALID_ARGUMENT;
1662         }
1663
1664         VM_GET_FLAGS_ALIAS(flags, alias);
1665
1666 #define RETURN(value)   { result = value; goto BailOut; }
1667
1668         assert(page_aligned(*address));
1669         assert(page_aligned(size));
1670
1671         /*
1672          * Only zero-fill objects are allowed to be purgable.
1673          * LP64todo - limit purgable objects to 32-bits for now
1674          */
1675         if (purgable &&
1676             (offset != 0 ||
1677              (object != VM_OBJECT_NULL &&
1678               (object->size != size ||
1679                object->purgable == VM_PURGABLE_DENY))
1680              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1681                 return KERN_INVALID_ARGUMENT;
1682
1683         if (!anywhere && overwrite) {
1684                 /*
1685                  * Create a temporary VM map to hold the old mappings in the
1686                  * affected area while we create the new one.
1687                  * This avoids releasing the VM map lock in
1688                  * vm_map_entry_delete() and allows atomicity
1689                  * when we want to replace some mappings with a new one.
1690                  * It also allows us to restore the old VM mappings if the
1691                  * new mapping fails.
1692                  */
1693                 zap_old_map = vm_map_create(PMAP_NULL,
1694                                             *address,
1695                                             *address + size,
1696                                             map->hdr.entries_pageable);
1697         }
1698
1699 StartAgain: ;
1700
1701         start = *address;
1702
1703         if (anywhere) {
1704                 vm_map_lock(map);
1705                 map_locked = TRUE;
1706
1707                 /*
1708                  *      Calculate the first possible address.
1709                  */
1710
1711                 if (start < effective_min_offset)
1712                         start = effective_min_offset;
1713                 if (start > effective_max_offset)
1714                         RETURN(KERN_NO_SPACE);
1715
1716                 /*
1717                  *      Look for the first possible address;
1718                  *      if there's already something at this
1719                  *      address, we have to start after it.
1720                  */
1721
1722                 assert(first_free_is_valid(map));
1723                 if (start == effective_min_offset) {
1724                         if ((entry = map->first_free) != vm_map_to_entry(map))
1725                                 start = entry->vme_end;
1726                 } else {
1727                         vm_map_entry_t  tmp_entry;
1728                         if (vm_map_lookup_entry(map, start, &tmp_entry))
1729                                 start = tmp_entry->vme_end;
1730                         entry = tmp_entry;
1731                 }
1732
1733                 /*
1734                  *      In any case, the "entry" always precedes
1735                  *      the proposed new region throughout the
1736                  *      loop:
1737                  */
1738
1739                 while (TRUE) {
1740                         register vm_map_entry_t next;
1741
1742                         /*
1743                          *      Find the end of the proposed new region.
1744                          *      Be sure we didn't go beyond the end, or
1745                          *      wrap around the address.
1746                          */
1747
1748                         end = ((start + mask) & ~mask);
1749                         if (end < start)
1750                                 RETURN(KERN_NO_SPACE);
1751                         start = end;
1752                         end += size;
1753
1754                         if ((end > effective_max_offset) || (end < start)) {
1755                                 if (map->wait_for_space) {
1756                                         if (size <= (effective_max_offset -
1757                                                      effective_min_offset)) {
1758                                                 assert_wait((event_t)map,
1759                                                             THREAD_ABORTSAFE);
1760                                                 vm_map_unlock(map);
1761                                                 map_locked = FALSE;
1762                                                 thread_block(THREAD_CONTINUE_NULL);
1763                                                 goto StartAgain;
1764                                         }
1765                                 }
1766                                 RETURN(KERN_NO_SPACE);
1767                         }
1768
1769                         /*
1770                          *      If there are no more entries, we must win.
1771                          */
1772
1773                         next = entry->vme_next;
1774                         if (next == vm_map_to_entry(map))
1775                                 break;
1776
1777                         /*
1778                          *      If there is another entry, it must be
1779                          *      after the end of the potential new region.
1780                          */
1781
1782                         if (next->vme_start >= end)
1783                                 break;
1784
1785                         /*
1786                          *      Didn't fit -- move to the next entry.
1787                          */
1788
1789                         entry = next;
1790                         start = entry->vme_end;
1791                 }
1792                 *address = start;
1793         } else {
1794                 /*
1795                  *      Verify that:
1796                  *              the address doesn't itself violate
1797                  *              the mask requirement.
1798                  */
1799
1800                 vm_map_lock(map);
1801                 map_locked = TRUE;
1802                 if ((start & mask) != 0)
1803                         RETURN(KERN_NO_SPACE);
1804
1805                 /*
1806                  *      ...     the address is within bounds
1807                  */
1808
1809                 end = start + size;
1810
1811                 if ((start < effective_min_offset) ||
1812                     (end > effective_max_offset) ||
1813                     (start >= end)) {
1814                         RETURN(KERN_INVALID_ADDRESS);
1815                 }
1816
1817                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1818                         /*
1819                          * Fixed mapping and "overwrite" flag: attempt to
1820                          * remove all existing mappings in the specified
1821                          * address range, saving them in our "zap_old_map".
1822                          */
1823                         (void) vm_map_delete(map, start, end,
1824                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1825                                              zap_old_map);
1826                 }
1827
1828                 /*
1829                  *      ...     the starting address isn't allocated
1830                  */
1831
1832                 if (vm_map_lookup_entry(map, start, &entry)) {
1833                         if (! (flags & VM_FLAGS_ALREADY)) {
1834                                 RETURN(KERN_NO_SPACE);
1835                         }
1836                         /*
1837                          * Check if what's already there is what we want.
1838                          */
1839                         tmp_start = start;
1840                         tmp_offset = offset;
1841                         if (entry->vme_start < start) {
1842                                 tmp_start -= start - entry->vme_start;
1843                                 tmp_offset -= start - entry->vme_start;
1844
1845                         }
1846                         for (; entry->vme_start < end;
1847                              entry = entry->vme_next) {
1848                                 /*
1849                                  * Check if the mapping's attributes
1850                                  * match the existing map entry.
1851                                  */
1852                                 if (entry == vm_map_to_entry(map) ||
1853                                     entry->vme_start != tmp_start ||
1854                                     entry->is_sub_map != is_submap ||
1855                                     entry->offset != tmp_offset ||
1856                                     entry->needs_copy != needs_copy ||
1857                                     entry->protection != cur_protection ||
1858                                     entry->max_protection != max_protection ||
1859                                     entry->inheritance != inheritance ||
1860                                     entry->alias != alias) {
1861                                         /* not the same mapping ! */
1862                                         RETURN(KERN_NO_SPACE);
1863                                 }
1864                                 /*
1865                                  * Check if the same object is being mapped.
1866                                  */
1867                                 if (is_submap) {
1868                                         if (entry->object.sub_map !=
1869                                             (vm_map_t) object) {
1870                                                 /* not the same submap */
1871                                                 RETURN(KERN_NO_SPACE);
1872                                         }
1873                                 } else {
1874                                         if (entry->object.vm_object != object) {
1875                                                 /* not the same VM object... */
1876                                                 vm_object_t obj2;
1877
1878                                                 obj2 = entry->object.vm_object;
1879                                                 if ((obj2 == VM_OBJECT_NULL ||
1880                                                      obj2->internal) &&
1881                                                     (object == VM_OBJECT_NULL ||
1882                                                      object->internal)) {
1883                                                         /*
1884                                                          * ... but both are
1885                                                          * anonymous memory,
1886                                                          * so equivalent.
1887                                                          */
1888                                                 } else {
1889                                                         RETURN(KERN_NO_SPACE);
1890                                                 }
1891                                         }
1892                                 }
1893
1894                                 tmp_offset += entry->vme_end - entry->vme_start;
1895                                 tmp_start += entry->vme_end - entry->vme_start;
1896                                 if (entry->vme_end >= end) {
1897                                         /* reached the end of our mapping */
1898                                         break;
1899                                 }
1900                         }
1901                         /* it all matches:  let's use what's already there ! */
1902                         RETURN(KERN_MEMORY_PRESENT);
1903                 }
1904
1905                 /*
1906                  *      ...     the next region doesn't overlap the
1907                  *              end point.
1908                  */
1909
1910                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1911                     (entry->vme_next->vme_start < end))
1912                         RETURN(KERN_NO_SPACE);
1913         }
1914
1915         /*
1916          *      At this point,
1917          *              "start" and "end" should define the endpoints of the
1918          *                      available new range, and
1919          *              "entry" should refer to the region before the new
1920          *                      range, and
1921          *
1922          *              the map should be locked.
1923          */
1924
1925         /*
1926          *      See whether we can avoid creating a new entry (and object) by
1927          *      extending one of our neighbors.  [So far, we only attempt to
1928          *      extend from below.]  Note that we can never extend/join
1929          *      purgable objects because they need to remain distinct
1930          *      entities in order to implement their "volatile object"
1931          *      semantics.
1932          */
1933
1934         if (purgable) {
1935                 if (object == VM_OBJECT_NULL) {
1936                         object = vm_object_allocate(size);
1937                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1938                         object->purgable = VM_PURGABLE_NONVOLATILE;
1939                         offset = (vm_object_offset_t)0;
1940                 }
1941         } else if ((is_submap == FALSE) &&
1942                    (object == VM_OBJECT_NULL) &&
1943                    (entry != vm_map_to_entry(map)) &&
1944                    (entry->vme_end == start) &&
1945                    (!entry->is_shared) &&
1946                    (!entry->is_sub_map) &&
1947                    (entry->alias == alias) &&
1948                    (entry->inheritance == inheritance) &&
1949                    (entry->protection == cur_protection) &&
1950                    (entry->max_protection == max_protection) &&
1951                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1952                    (entry->in_transition == 0) &&
1953                    (entry->no_cache == no_cache) &&
1954                    ((entry->vme_end - entry->vme_start) + size <=
1955                     (alias == VM_MEMORY_REALLOC ?
1956                      ANON_CHUNK_SIZE :
1957                      NO_COALESCE_LIMIT)) &&
1958                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1959                 if (vm_object_coalesce(entry->object.vm_object,
1960                                        VM_OBJECT_NULL,
1961                                        entry->offset,
1962                                        (vm_object_offset_t) 0,
1963                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1964                                        (vm_map_size_t)(end - entry->vme_end))) {
1965
1966                         /*
1967                          *      Coalesced the two objects - can extend
1968                          *      the previous map entry to include the
1969                          *      new range.
1970                          */
1971                         map->size += (end - entry->vme_end);
1972                         entry->vme_end = end;
1973                         UPDATE_FIRST_FREE(map, map->first_free);
1974                         RETURN(KERN_SUCCESS);
1975                 }
1976         }
1977
1978         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
1979         new_entry = NULL;
1980
1981         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
1982                 tmp2_end = tmp2_start + step;
1983                 /*
1984                  *      Create a new entry
1985                  *      LP64todo - for now, we can only allocate 4GB internal objects
1986                  *      because the default pager can't page bigger ones.  Remove this
1987                  *      when it can.
1988                  *
1989                  * XXX FBDP
1990                  * The reserved "page zero" in each process's address space can
1991                  * be arbitrarily large.  Splitting it into separate 4GB objects and
1992                  * therefore different VM map entries serves no purpose and just
1993                  * slows down operations on the VM map, so let's not split the
1994                  * allocation into 4GB chunks if the max protection is NONE.  That
1995                  * memory should never be accessible, so it will never get to the
1996                  * default pager.
1997                  */
1998                 tmp_start = tmp2_start;
1999                 if (object == VM_OBJECT_NULL &&
2000                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2001                     max_protection != VM_PROT_NONE &&
2002                     superpage_size == 0)
2003                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2004                 else
2005                         tmp_end = tmp2_end;
2006                 do {
2007                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2008                                                         object, offset, needs_copy,
2009                                                         FALSE, FALSE,
2010                                                         cur_protection, max_protection,
2011                                                         VM_BEHAVIOR_DEFAULT,
2012                                                         inheritance, 0, no_cache,
2013                                                         permanent, superpage_size);
2014                         new_entry->alias = alias;
2015                         if (is_submap) {
2016                                 vm_map_t        submap;
2017                                 boolean_t       submap_is_64bit;
2018                                 boolean_t       use_pmap;
2019
2020                                 new_entry->is_sub_map = TRUE;
2021                                 submap = (vm_map_t) object;
2022                                 submap_is_64bit = vm_map_is_64bit(submap);
2023                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2024         #ifndef NO_NESTED_PMAP
2025                                 if (use_pmap && submap->pmap == NULL) {
2026                                         /* we need a sub pmap to nest... */
2027                                         submap->pmap = pmap_create(0, submap_is_64bit);
2028                                         if (submap->pmap == NULL) {
2029                                                 /* let's proceed without nesting... */
2030                                         }
2031                                 }
2032                                 if (use_pmap && submap->pmap != NULL) {
2033                                         kr = pmap_nest(map->pmap,
2034                                                        submap->pmap,
2035                                                        tmp_start,
2036                                                        tmp_start,
2037                                                        tmp_end - tmp_start);
2038                                         if (kr != KERN_SUCCESS) {
2039                                                 printf("vm_map_enter: "
2040                                                        "pmap_nest(0x%llx,0x%llx) "
2041                                                        "error 0x%x\n",
2042                                                        (long long)tmp_start,
2043                                                        (long long)tmp_end,
2044                                                        kr);
2045                                         } else {
2046                                                 /* we're now nested ! */
2047                                                 new_entry->use_pmap = TRUE;
2048                                                 pmap_empty = FALSE;
2049                                         }
2050                                 }
2051         #endif /* NO_NESTED_PMAP */
2052                         }
2053                         entry = new_entry;
2054
2055                         if (superpage_size) {
2056                                 vm_page_t pages, m;
2057                                 vm_object_t sp_object;
2058
2059                                 entry->offset = 0;
2060
2061                                 /* allocate one superpage */
2062                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2063                                 if (kr != KERN_SUCCESS) {
2064                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
2065                                         RETURN(kr);
2066                                 }
2067
2068                                 /* create one vm_object per superpage */
2069                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2070                                 sp_object->phys_contiguous = TRUE;
2071                                 sp_object->shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2072                                 entry->object.vm_object = sp_object;
2073
2074                                 /* enter the base pages into the object */
2075                                 vm_object_lock(sp_object);
2076                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2077                                         m = pages;
2078                                         pmap_zero_page(m->phys_page);
2079                                         pages = NEXT_PAGE(m);
2080                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2081                                         vm_page_insert(m, sp_object, offset);
2082                                 }
2083                                 vm_object_unlock(sp_object);
2084                         }
2085                 } while (tmp_end != tmp2_end &&
2086                          (tmp_start = tmp_end) &&
2087                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2088                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2089         }
2090
2091         vm_map_unlock(map);
2092         map_locked = FALSE;
2093
2094         new_mapping_established = TRUE;
2095
2096         /*      Wire down the new entry if the user
2097          *      requested all new map entries be wired.
2098          */
2099         if ((map->wiring_required)||(superpage_size)) {
2100                 pmap_empty = FALSE; /* pmap won't be empty */
2101                 result = vm_map_wire(map, start, end,
2102                                      new_entry->protection, TRUE);
2103                 RETURN(result);
2104         }
2105
2106         if ((object != VM_OBJECT_NULL) &&
2107             (vm_map_pmap_enter_enable) &&
2108             (!anywhere)  &&
2109             (!needs_copy) &&
2110             (size < (128*1024))) {
2111                 pmap_empty = FALSE; /* pmap won't be empty */
2112
2113                 if (override_nx(map, alias) && cur_protection)
2114                         cur_protection |= VM_PROT_EXECUTE;
2115
2116                 vm_map_pmap_enter(map, start, end,
2117                                   object, offset, cur_protection);
2118         }
2119
2120 BailOut: ;
2121         if (result == KERN_SUCCESS) {
2122                 vm_prot_t pager_prot;
2123                 memory_object_t pager;
2124
2125                 if (pmap_empty &&
2126                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2127                         assert(vm_map_pmap_is_empty(map,
2128                                                     *address,
2129                                                     *address+size));
2130                 }
2131
2132                 /*
2133                  * For "named" VM objects, let the pager know that the
2134                  * memory object is being mapped.  Some pagers need to keep
2135                  * track of this, to know when they can reclaim the memory
2136                  * object, for example.
2137                  * VM calls memory_object_map() for each mapping (specifying
2138                  * the protection of each mapping) and calls
2139                  * memory_object_last_unmap() when all the mappings are gone.
2140                  */
2141                 pager_prot = max_protection;
2142                 if (needs_copy) {
2143                         /*
2144                          * Copy-On-Write mapping: won't modify
2145                          * the memory object.
2146                          */
2147                         pager_prot &= ~VM_PROT_WRITE;
2148                 }
2149                 if (!is_submap &&
2150                     object != VM_OBJECT_NULL &&
2151                     object->named &&
2152                     object->pager != MEMORY_OBJECT_NULL) {
2153                         vm_object_lock(object);
2154                         pager = object->pager;
2155                         if (object->named &&
2156                             pager != MEMORY_OBJECT_NULL) {
2157                                 assert(object->pager_ready);
2158                                 vm_object_mapping_wait(object, THREAD_UNINT);
2159                                 vm_object_mapping_begin(object);
2160                                 vm_object_unlock(object);
2161
2162                                 kr = memory_object_map(pager, pager_prot);
2163                                 assert(kr == KERN_SUCCESS);
2164
2165                                 vm_object_lock(object);
2166                                 vm_object_mapping_end(object);
2167                         }
2168                         vm_object_unlock(object);
2169                 }
2170         } else {
2171                 if (new_mapping_established) {
2172                         /*
2173                          * We have to get rid of the new mappings since we
2174                          * won't make them available to the user.
2175                          * Try and do that atomically, to minimize the risk
2176                          * that someone else create new mappings that range.
2177                          */
2178                         zap_new_map = vm_map_create(PMAP_NULL,
2179                                                     *address,
2180                                                     *address + size,
2181                                                     map->hdr.entries_pageable);
2182                         if (!map_locked) {
2183                                 vm_map_lock(map);
2184                                 map_locked = TRUE;
2185                         }
2186                         (void) vm_map_delete(map, *address, *address+size,
2187                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2188                                              zap_new_map);
2189                 }
2190                 if (zap_old_map != VM_MAP_NULL &&
2191                     zap_old_map->hdr.nentries != 0) {
2192                         vm_map_entry_t  entry1, entry2;
2193
2194                         /*
2195                          * The new mapping failed.  Attempt to restore
2196                          * the old mappings, saved in the "zap_old_map".
2197                          */
2198                         if (!map_locked) {
2199                                 vm_map_lock(map);
2200                                 map_locked = TRUE;
2201                         }
2202
2203                         /* first check if the coast is still clear */
2204                         start = vm_map_first_entry(zap_old_map)->vme_start;
2205                         end = vm_map_last_entry(zap_old_map)->vme_end;
2206                         if (vm_map_lookup_entry(map, start, &entry1) ||
2207                             vm_map_lookup_entry(map, end, &entry2) ||
2208                             entry1 != entry2) {
2209                                 /*
2210                                  * Part of that range has already been
2211                                  * re-mapped:  we can't restore the old
2212                                  * mappings...
2213                                  */
2214                                 vm_map_enter_restore_failures++;
2215                         } else {
2216                                 /*
2217                                  * Transfer the saved map entries from
2218                                  * "zap_old_map" to the original "map",
2219                                  * inserting them all after "entry1".
2220                                  */
2221                                 for (entry2 = vm_map_first_entry(zap_old_map);
2222                                      entry2 != vm_map_to_entry(zap_old_map);
2223                                      entry2 = vm_map_first_entry(zap_old_map)) {
2224                                         vm_map_size_t entry_size;
2225
2226                                         entry_size = (entry2->vme_end -
2227                                                       entry2->vme_start);
2228                                         vm_map_entry_unlink(zap_old_map,
2229                                                             entry2);
2230                                         zap_old_map->size -= entry_size;
2231                                         vm_map_entry_link(map, entry1, entry2);
2232                                         map->size += entry_size;
2233                                         entry1 = entry2;
2234                                 }
2235                                 if (map->wiring_required) {
2236                                         /*
2237                                          * XXX TODO: we should rewire the
2238                                          * old pages here...
2239                                          */
2240                                 }
2241                                 vm_map_enter_restore_successes++;
2242                         }
2243                 }
2244         }
2245
2246         if (map_locked) {
2247                 vm_map_unlock(map);
2248         }
2249
2250         /*
2251          * Get rid of the "zap_maps" and all the map entries that
2252          * they may still contain.
2253          */
2254         if (zap_old_map != VM_MAP_NULL) {
2255                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2256                 zap_old_map = VM_MAP_NULL;
2257         }
2258         if (zap_new_map != VM_MAP_NULL) {
2259                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2260                 zap_new_map = VM_MAP_NULL;
2261         }
2262
2263         return result;
2264
2265 #undef  RETURN
2266 }
2267
2268 kern_return_t
2269 vm_map_enter_mem_object(
2270         vm_map_t                target_map,
2271         vm_map_offset_t         *address,
2272         vm_map_size_t           initial_size,
2273         vm_map_offset_t         mask,
2274         int                     flags,
2275         ipc_port_t              port,
2276         vm_object_offset_t      offset,
2277         boolean_t               copy,
2278         vm_prot_t               cur_protection,
2279         vm_prot_t               max_protection,
2280         vm_inherit_t            inheritance)
2281 {
2282         vm_map_address_t        map_addr;
2283         vm_map_size_t           map_size;
2284         vm_object_t             object;
2285         vm_object_size_t        size;
2286         kern_return_t           result;
2287
2288         /*
2289          * Check arguments for validity
2290          */
2291         if ((target_map == VM_MAP_NULL) ||
2292             (cur_protection & ~VM_PROT_ALL) ||
2293             (max_protection & ~VM_PROT_ALL) ||
2294             (inheritance > VM_INHERIT_LAST_VALID) ||
2295             initial_size == 0)
2296                 return KERN_INVALID_ARGUMENT;
2297
2298         map_addr = vm_map_trunc_page(*address);
2299         map_size = vm_map_round_page(initial_size);
2300         size = vm_object_round_page(initial_size);
2301
2302         /*
2303          * Find the vm object (if any) corresponding to this port.
2304          */
2305         if (!IP_VALID(port)) {
2306                 object = VM_OBJECT_NULL;
2307                 offset = 0;
2308                 copy = FALSE;
2309         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2310                 vm_named_entry_t        named_entry;
2311
2312                 named_entry = (vm_named_entry_t) port->ip_kobject;
2313                 /* a few checks to make sure user is obeying rules */
2314                 if (size == 0) {
2315                         if (offset >= named_entry->size)
2316                                 return KERN_INVALID_RIGHT;
2317                         size = named_entry->size - offset;
2318                 }
2319                 if ((named_entry->protection & max_protection) !=
2320                     max_protection)
2321                         return KERN_INVALID_RIGHT;
2322                 if ((named_entry->protection & cur_protection) !=
2323                     cur_protection)
2324                         return KERN_INVALID_RIGHT;
2325                 if (named_entry->size < (offset + size))
2326                         return KERN_INVALID_ARGUMENT;
2327
2328                 /* the callers parameter offset is defined to be the */
2329                 /* offset from beginning of named entry offset in object */
2330                 offset = offset + named_entry->offset;
2331
2332                 named_entry_lock(named_entry);
2333                 if (named_entry->is_sub_map) {
2334                         vm_map_t                submap;
2335
2336                         submap = named_entry->backing.map;
2337                         vm_map_lock(submap);
2338                         vm_map_reference(submap);
2339                         vm_map_unlock(submap);
2340                         named_entry_unlock(named_entry);
2341
2342                         result = vm_map_enter(target_map,
2343                                               &map_addr,
2344                                               map_size,
2345                                               mask,
2346                                               flags | VM_FLAGS_SUBMAP,
2347                                               (vm_object_t) submap,
2348                                               offset,
2349                                               copy,
2350                                               cur_protection,
2351                                               max_protection,
2352                                               inheritance);
2353                         if (result != KERN_SUCCESS) {
2354                                 vm_map_deallocate(submap);
2355                         } else {
2356                                 /*
2357                                  * No need to lock "submap" just to check its
2358                                  * "mapped" flag: that flag is never reset
2359                                  * once it's been set and if we race, we'll
2360                                  * just end up setting it twice, which is OK.
2361                                  */
2362                                 if (submap->mapped == FALSE) {
2363                                         /*
2364                                          * This submap has never been mapped.
2365                                          * Set its "mapped" flag now that it
2366                                          * has been mapped.
2367                                          * This happens only for the first ever
2368                                          * mapping of a "submap".
2369                                          */
2370                                         vm_map_lock(submap);
2371                                         submap->mapped = TRUE;
2372                                         vm_map_unlock(submap);
2373                                 }
2374                                 *address = map_addr;
2375                         }
2376                         return result;
2377
2378                 } else if (named_entry->is_pager) {
2379                         unsigned int    access;
2380                         vm_prot_t       protections;
2381                         unsigned int    wimg_mode;
2382                         boolean_t       cache_attr;
2383
2384                         protections = named_entry->protection & VM_PROT_ALL;
2385                         access = GET_MAP_MEM(named_entry->protection);
2386
2387                         object = vm_object_enter(named_entry->backing.pager,
2388                                                  named_entry->size,
2389                                                  named_entry->internal,
2390                                                  FALSE,
2391                                                  FALSE);
2392                         if (object == VM_OBJECT_NULL) {
2393                                 named_entry_unlock(named_entry);
2394                                 return KERN_INVALID_OBJECT;
2395                         }
2396
2397                         /* JMM - drop reference on pager here */
2398
2399                         /* create an extra ref for the named entry */
2400                         vm_object_lock(object);
2401                         vm_object_reference_locked(object);
2402                         named_entry->backing.object = object;
2403                         named_entry->is_pager = FALSE;
2404                         named_entry_unlock(named_entry);
2405
2406                         wimg_mode = object->wimg_bits;
2407                         if (access == MAP_MEM_IO) {
2408                                 wimg_mode = VM_WIMG_IO;
2409                         } else if (access == MAP_MEM_COPYBACK) {
2410                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2411                         } else if (access == MAP_MEM_WTHRU) {
2412                                 wimg_mode = VM_WIMG_WTHRU;
2413                         } else if (access == MAP_MEM_WCOMB) {
2414                                 wimg_mode = VM_WIMG_WCOMB;
2415                         }
2416                         if (wimg_mode == VM_WIMG_IO ||
2417                             wimg_mode == VM_WIMG_WCOMB)
2418                                 cache_attr = TRUE;
2419                         else
2420                                 cache_attr = FALSE;
2421
2422                         /* wait for object (if any) to be ready */
2423                         if (!named_entry->internal) {
2424                                 while (!object->pager_ready) {
2425                                         vm_object_wait(
2426                                                 object,
2427                                                 VM_OBJECT_EVENT_PAGER_READY,
2428                                                 THREAD_UNINT);
2429                                         vm_object_lock(object);
2430                                 }
2431                         }
2432
2433                         if (object->wimg_bits != wimg_mode) {
2434                                 vm_page_t p;
2435
2436                                 vm_object_paging_wait(object, THREAD_UNINT);
2437
2438                                 object->wimg_bits = wimg_mode;
2439                                 queue_iterate(&object->memq, p, vm_page_t, listq) {
2440                                         if (!p->fictitious) {
2441                                                 if (p->pmapped)
2442                                                         pmap_disconnect(p->phys_page);
2443                                                 if (cache_attr)
2444                                                         pmap_sync_page_attributes_phys(p->phys_page);
2445                                         }
2446                                 }
2447                         }
2448                         object->true_share = TRUE;
2449                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2450                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2451                         vm_object_unlock(object);
2452                 } else {
2453                         /* This is the case where we are going to map */
2454                         /* an already mapped object.  If the object is */
2455                         /* not ready it is internal.  An external     */
2456                         /* object cannot be mapped until it is ready  */
2457                         /* we can therefore avoid the ready check     */
2458                         /* in this case.  */
2459                         object = named_entry->backing.object;
2460                         assert(object != VM_OBJECT_NULL);
2461                         named_entry_unlock(named_entry);
2462                         vm_object_reference(object);
2463                 }
2464         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2465                 /*
2466                  * JMM - This is temporary until we unify named entries
2467                  * and raw memory objects.
2468                  *
2469                  * Detected fake ip_kotype for a memory object.  In
2470                  * this case, the port isn't really a port at all, but
2471                  * instead is just a raw memory object.
2472                  */
2473
2474                 object = vm_object_enter((memory_object_t)port,
2475                                          size, FALSE, FALSE, FALSE);
2476                 if (object == VM_OBJECT_NULL)
2477                         return KERN_INVALID_OBJECT;
2478
2479                 /* wait for object (if any) to be ready */
2480                 if (object != VM_OBJECT_NULL) {
2481                         if (object == kernel_object) {
2482                                 printf("Warning: Attempt to map kernel object"
2483                                         " by a non-private kernel entity\n");
2484                                 return KERN_INVALID_OBJECT;
2485                         }
2486                         if (!object->pager_ready) {
2487                                 vm_object_lock(object);
2488
2489                                 while (!object->pager_ready) {
2490                                         vm_object_wait(object,
2491                                                        VM_OBJECT_EVENT_PAGER_READY,
2492                                                        THREAD_UNINT);
2493                                         vm_object_lock(object);
2494                                 }
2495                                 vm_object_unlock(object);
2496                         }
2497                 }
2498         } else {
2499                 return KERN_INVALID_OBJECT;
2500         }
2501
2502         if (object != VM_OBJECT_NULL &&
2503             object->named &&
2504             object->pager != MEMORY_OBJECT_NULL &&
2505             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2506                 memory_object_t pager;
2507                 vm_prot_t       pager_prot;
2508                 kern_return_t   kr;
2509
2510                 /*
2511                  * For "named" VM objects, let the pager know that the
2512                  * memory object is being mapped.  Some pagers need to keep
2513                  * track of this, to know when they can reclaim the memory
2514                  * object, for example.
2515                  * VM calls memory_object_map() for each mapping (specifying
2516                  * the protection of each mapping) and calls
2517                  * memory_object_last_unmap() when all the mappings are gone.
2518                  */
2519                 pager_prot = max_protection;
2520                 if (copy) {
2521                         /*
2522                          * Copy-On-Write mapping: won't modify the
2523                          * memory object.
2524                          */
2525                         pager_prot &= ~VM_PROT_WRITE;
2526                 }
2527                 vm_object_lock(object);
2528                 pager = object->pager;
2529                 if (object->named &&
2530                     pager != MEMORY_OBJECT_NULL &&
2531                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2532                         assert(object->pager_ready);
2533                         vm_object_mapping_wait(object, THREAD_UNINT);
2534                         vm_object_mapping_begin(object);
2535                         vm_object_unlock(object);
2536
2537                         kr = memory_object_map(pager, pager_prot);
2538                         assert(kr == KERN_SUCCESS);
2539
2540                         vm_object_lock(object);
2541                         vm_object_mapping_end(object);
2542                 }
2543                 vm_object_unlock(object);
2544         }
2545
2546         /*
2547          *      Perform the copy if requested
2548          */
2549
2550         if (copy) {
2551                 vm_object_t             new_object;
2552                 vm_object_offset_t      new_offset;
2553
2554                 result = vm_object_copy_strategically(object, offset, size,
2555                                                       &new_object, &new_offset,
2556                                                       &copy);
2557
2558
2559                 if (result == KERN_MEMORY_RESTART_COPY) {
2560                         boolean_t success;
2561                         boolean_t src_needs_copy;
2562
2563                         /*
2564                          * XXX
2565                          * We currently ignore src_needs_copy.
2566                          * This really is the issue of how to make
2567                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2568                          * non-kernel users to use. Solution forthcoming.
2569                          * In the meantime, since we don't allow non-kernel
2570                          * memory managers to specify symmetric copy,
2571                          * we won't run into problems here.
2572                          */
2573                         new_object = object;
2574                         new_offset = offset;
2575                         success = vm_object_copy_quickly(&new_object,
2576                                                          new_offset, size,
2577                                                          &src_needs_copy,
2578                                                          &copy);
2579                         assert(success);
2580                         result = KERN_SUCCESS;
2581                 }
2582                 /*
2583                  *      Throw away the reference to the
2584                  *      original object, as it won't be mapped.
2585                  */
2586
2587                 vm_object_deallocate(object);
2588
2589                 if (result != KERN_SUCCESS)
2590                         return result;
2591
2592                 object = new_object;
2593                 offset = new_offset;
2594         }
2595
2596         result = vm_map_enter(target_map,
2597                               &map_addr, map_size,
2598                               (vm_map_offset_t)mask,
2599                               flags,
2600                               object, offset,
2601                               copy,
2602                               cur_protection, max_protection, inheritance);
2603         if (result != KERN_SUCCESS)
2604                 vm_object_deallocate(object);
2605         *address = map_addr;
2606         return result;
2607 }
2608
2609
2610
2611
2612 kern_return_t
2613 vm_map_enter_mem_object_control(
2614         vm_map_t                target_map,
2615         vm_map_offset_t         *address,
2616         vm_map_size_t           initial_size,
2617         vm_map_offset_t         mask,
2618         int                     flags,
2619         memory_object_control_t control,
2620         vm_object_offset_t      offset,
2621         boolean_t               copy,
2622         vm_prot_t               cur_protection,
2623         vm_prot_t               max_protection,
2624         vm_inherit_t            inheritance)
2625 {
2626         vm_map_address_t        map_addr;
2627         vm_map_size_t           map_size;
2628         vm_object_t             object;
2629         vm_object_size_t        size;
2630         kern_return_t           result;
2631         memory_object_t         pager;
2632         vm_prot_t               pager_prot;
2633         kern_return_t           kr;
2634
2635         /*
2636          * Check arguments for validity
2637          */
2638         if ((target_map == VM_MAP_NULL) ||
2639             (cur_protection & ~VM_PROT_ALL) ||
2640             (max_protection & ~VM_PROT_ALL) ||
2641             (inheritance > VM_INHERIT_LAST_VALID) ||
2642             initial_size == 0)
2643                 return KERN_INVALID_ARGUMENT;
2644
2645         map_addr = vm_map_trunc_page(*address);
2646         map_size = vm_map_round_page(initial_size);
2647         size = vm_object_round_page(initial_size);
2648
2649         object = memory_object_control_to_vm_object(control);
2650
2651         if (object == VM_OBJECT_NULL)
2652                 return KERN_INVALID_OBJECT;
2653
2654         if (object == kernel_object) {
2655                 printf("Warning: Attempt to map kernel object"
2656                        " by a non-private kernel entity\n");
2657                 return KERN_INVALID_OBJECT;
2658         }
2659
2660         vm_object_lock(object);
2661         object->ref_count++;
2662         vm_object_res_reference(object);
2663
2664         /*
2665          * For "named" VM objects, let the pager know that the
2666          * memory object is being mapped.  Some pagers need to keep
2667          * track of this, to know when they can reclaim the memory
2668          * object, for example.
2669          * VM calls memory_object_map() for each mapping (specifying
2670          * the protection of each mapping) and calls
2671          * memory_object_last_unmap() when all the mappings are gone.
2672          */
2673         pager_prot = max_protection;
2674         if (copy) {
2675                 pager_prot &= ~VM_PROT_WRITE;
2676         }
2677         pager = object->pager;
2678         if (object->named &&
2679             pager != MEMORY_OBJECT_NULL &&
2680             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2681                 assert(object->pager_ready);
2682                 vm_object_mapping_wait(object, THREAD_UNINT);
2683                 vm_object_mapping_begin(object);
2684                 vm_object_unlock(object);
2685
2686                 kr = memory_object_map(pager, pager_prot);
2687                 assert(kr == KERN_SUCCESS);
2688
2689                 vm_object_lock(object);
2690                 vm_object_mapping_end(object);
2691         }
2692         vm_object_unlock(object);
2693
2694         /*
2695          *      Perform the copy if requested
2696          */
2697
2698         if (copy) {
2699                 vm_object_t             new_object;
2700                 vm_object_offset_t      new_offset;
2701
2702                 result = vm_object_copy_strategically(object, offset, size,
2703                                                       &new_object, &new_offset,
2704                                                       &copy);
2705
2706
2707                 if (result == KERN_MEMORY_RESTART_COPY) {
2708                         boolean_t success;
2709                         boolean_t src_needs_copy;
2710
2711                         /*
2712                          * XXX
2713                          * We currently ignore src_needs_copy.
2714                          * This really is the issue of how to make
2715                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2716                          * non-kernel users to use. Solution forthcoming.
2717                          * In the meantime, since we don't allow non-kernel
2718                          * memory managers to specify symmetric copy,
2719                          * we won't run into problems here.
2720                          */
2721                         new_object = object;
2722                         new_offset = offset;
2723                         success = vm_object_copy_quickly(&new_object,
2724                                                          new_offset, size,
2725                                                          &src_needs_copy,
2726                                                          &copy);
2727                         assert(success);
2728                         result = KERN_SUCCESS;
2729                 }
2730                 /*
2731                  *      Throw away the reference to the
2732                  *      original object, as it won't be mapped.
2733                  */
2734
2735                 vm_object_deallocate(object);
2736
2737                 if (result != KERN_SUCCESS)
2738                         return result;
2739
2740                 object = new_object;
2741                 offset = new_offset;
2742         }
2743
2744         result = vm_map_enter(target_map,
2745                               &map_addr, map_size,
2746                               (vm_map_offset_t)mask,
2747                               flags,
2748                               object, offset,
2749                               copy,
2750                               cur_protection, max_protection, inheritance);
2751         if (result != KERN_SUCCESS)
2752                 vm_object_deallocate(object);
2753         *address = map_addr;
2754
2755         return result;
2756 }
2757
2758
2759 #if     VM_CPM
2760
2761 #ifdef MACH_ASSERT
2762 extern pmap_paddr_t     avail_start, avail_end;
2763 #endif
2764
2765 /*
2766  *      Allocate memory in the specified map, with the caveat that
2767  *      the memory is physically contiguous.  This call may fail
2768  *      if the system can't find sufficient contiguous memory.
2769  *      This call may cause or lead to heart-stopping amounts of
2770  *      paging activity.
2771  *
2772  *      Memory obtained from this call should be freed in the
2773  *      normal way, viz., via vm_deallocate.
2774  */
2775 kern_return_t
2776 vm_map_enter_cpm(
2777         vm_map_t                map,
2778         vm_map_offset_t *addr,
2779         vm_map_size_t           size,
2780         int                     flags)
2781 {
2782         vm_object_t             cpm_obj;
2783         pmap_t                  pmap;
2784         vm_page_t               m, pages;
2785         kern_return_t           kr;
2786         vm_map_offset_t         va, start, end, offset;
2787 #if     MACH_ASSERT
2788         vm_map_offset_t         prev_addr;
2789 #endif  /* MACH_ASSERT */
2790
2791         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2792
2793         if (!vm_allocate_cpm_enabled)
2794                 return KERN_FAILURE;
2795
2796         if (size == 0) {
2797                 *addr = 0;
2798                 return KERN_SUCCESS;
2799         }
2800         if (anywhere)
2801                 *addr = vm_map_min(map);
2802         else
2803                 *addr = vm_map_trunc_page(*addr);
2804         size = vm_map_round_page(size);
2805
2806         /*
2807          * LP64todo - cpm_allocate should probably allow
2808          * allocations of >4GB, but not with the current
2809          * algorithm, so just cast down the size for now.
2810          */
2811         if (size > VM_MAX_ADDRESS)
2812                 return KERN_RESOURCE_SHORTAGE;
2813         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2814                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2815                 return kr;
2816
2817         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2818         assert(cpm_obj != VM_OBJECT_NULL);
2819         assert(cpm_obj->internal);
2820         assert(cpm_obj->size == (vm_object_size_t)size);
2821         assert(cpm_obj->can_persist == FALSE);
2822         assert(cpm_obj->pager_created == FALSE);
2823         assert(cpm_obj->pageout == FALSE);
2824         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2825
2826         /*
2827          *      Insert pages into object.
2828          */
2829
2830         vm_object_lock(cpm_obj);
2831         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2832                 m = pages;
2833                 pages = NEXT_PAGE(m);
2834                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2835
2836                 assert(!m->gobbled);
2837                 assert(!m->wanted);
2838                 assert(!m->pageout);
2839                 assert(!m->tabled);
2840                 assert(VM_PAGE_WIRED(m));
2841                 /*
2842                  * ENCRYPTED SWAP:
2843                  * "m" is not supposed to be pageable, so it
2844                  * should not be encrypted.  It wouldn't be safe
2845                  * to enter it in a new VM object while encrypted.
2846                  */
2847                 ASSERT_PAGE_DECRYPTED(m);
2848                 assert(m->busy);
2849                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2850
2851                 m->busy = FALSE;
2852                 vm_page_insert(m, cpm_obj, offset);
2853         }
2854         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2855         vm_object_unlock(cpm_obj);
2856
2857         /*
2858          *      Hang onto a reference on the object in case a
2859          *      multi-threaded application for some reason decides
2860          *      to deallocate the portion of the address space into
2861          *      which we will insert this object.
2862          *
2863          *      Unfortunately, we must insert the object now before
2864          *      we can talk to the pmap module about which addresses
2865          *      must be wired down.  Hence, the race with a multi-
2866          *      threaded app.
2867          */
2868         vm_object_reference(cpm_obj);
2869
2870         /*
2871          *      Insert object into map.
2872          */
2873
2874         kr = vm_map_enter(
2875                 map,
2876                 addr,
2877                 size,
2878                 (vm_map_offset_t)0,
2879                 flags,
2880                 cpm_obj,
2881                 (vm_object_offset_t)0,
2882                 FALSE,
2883                 VM_PROT_ALL,
2884                 VM_PROT_ALL,
2885                 VM_INHERIT_DEFAULT);
2886
2887         if (kr != KERN_SUCCESS) {
2888                 /*
2889                  *      A CPM object doesn't have can_persist set,
2890                  *      so all we have to do is deallocate it to
2891                  *      free up these pages.
2892                  */
2893                 assert(cpm_obj->pager_created == FALSE);
2894                 assert(cpm_obj->can_persist == FALSE);
2895                 assert(cpm_obj->pageout == FALSE);
2896                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2897                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2898                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2899         }
2900
2901         /*
2902          *      Inform the physical mapping system that the
2903          *      range of addresses may not fault, so that
2904          *      page tables and such can be locked down as well.
2905          */
2906         start = *addr;
2907         end = start + size;
2908         pmap = vm_map_pmap(map);
2909         pmap_pageable(pmap, start, end, FALSE);
2910
2911         /*
2912          *      Enter each page into the pmap, to avoid faults.
2913          *      Note that this loop could be coded more efficiently,
2914          *      if the need arose, rather than looking up each page
2915          *      again.
2916          */
2917         for (offset = 0, va = start; offset < size;
2918              va += PAGE_SIZE, offset += PAGE_SIZE) {
2919                 int type_of_fault;
2920
2921                 vm_object_lock(cpm_obj);
2922                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2923                 assert(m != VM_PAGE_NULL);
2924
2925                 vm_page_zero_fill(m);
2926
2927                 type_of_fault = DBG_ZERO_FILL_FAULT;
2928
2929                 vm_fault_enter(m, pmap, va, VM_PROT_ALL,
2930                                VM_PAGE_WIRED(m), FALSE, FALSE,
2931                                &type_of_fault);
2932
2933                 vm_object_unlock(cpm_obj);
2934         }
2935
2936 #if     MACH_ASSERT
2937         /*
2938          *      Verify ordering in address space.
2939          */
2940         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2941                 vm_object_lock(cpm_obj);
2942                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2943                 vm_object_unlock(cpm_obj);
2944                 if (m == VM_PAGE_NULL)
2945                         panic("vm_allocate_cpm:  obj 0x%x off 0x%x no page",
2946                               cpm_obj, offset);
2947                 assert(m->tabled);
2948                 assert(!m->busy);
2949                 assert(!m->wanted);
2950                 assert(!m->fictitious);
2951                 assert(!m->private);
2952                 assert(!m->absent);
2953                 assert(!m->error);
2954                 assert(!m->cleaning);
2955                 assert(!m->precious);
2956                 assert(!m->clustered);
2957                 if (offset != 0) {
2958                         if (m->phys_page != prev_addr + 1) {
2959                                 printf("start 0x%x end 0x%x va 0x%x\n",
2960                                        start, end, va);
2961                                 printf("obj 0x%x off 0x%x\n", cpm_obj, offset);
2962                                 printf("m 0x%x prev_address 0x%x\n", m,
2963                                        prev_addr);
2964                                 panic("vm_allocate_cpm:  pages not contig!");
2965                         }
2966                 }
2967                 prev_addr = m->phys_page;
2968         }
2969 #endif  /* MACH_ASSERT */
2970
2971         vm_object_deallocate(cpm_obj); /* kill extra ref */
2972
2973         return kr;
2974 }
2975
2976
2977 #else   /* VM_CPM */
2978
2979 /*
2980  *      Interface is defined in all cases, but unless the kernel
2981  *      is built explicitly for this option, the interface does
2982  *      nothing.
2983  */
2984
2985 kern_return_t
2986 vm_map_enter_cpm(
2987         __unused vm_map_t       map,
2988         __unused vm_map_offset_t        *addr,
2989         __unused vm_map_size_t  size,
2990         __unused int            flags)
2991 {
2992         return KERN_FAILURE;
2993 }
2994 #endif /* VM_CPM */
2995
2996 /* Not used without nested pmaps */
2997 #ifndef NO_NESTED_PMAP
2998 /*
2999  * Clip and unnest a portion of a nested submap mapping.
3000  */
3001
3002
3003 static void
3004 vm_map_clip_unnest(
3005         vm_map_t        map,
3006         vm_map_entry_t  entry,
3007         vm_map_offset_t start_unnest,
3008         vm_map_offset_t end_unnest)
3009 {
3010         vm_map_offset_t old_start_unnest = start_unnest;
3011         vm_map_offset_t old_end_unnest = end_unnest;
3012
3013         assert(entry->is_sub_map);
3014         assert(entry->object.sub_map != NULL);
3015
3016         /*
3017          * Query the platform for the optimal unnest range.
3018          * DRK: There's some duplication of effort here, since
3019          * callers may have adjusted the range to some extent. This
3020          * routine was introduced to support 1GiB subtree nesting
3021          * for x86 platforms, which can also nest on 2MiB boundaries
3022          * depending on size/alignment.
3023          */
3024         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3025                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3026         }
3027
3028         if (entry->vme_start > start_unnest ||
3029             entry->vme_end < end_unnest) {
3030                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3031                       "bad nested entry: start=0x%llx end=0x%llx\n",
3032                       (long long)start_unnest, (long long)end_unnest,
3033                       (long long)entry->vme_start, (long long)entry->vme_end);
3034         }
3035
3036         if (start_unnest > entry->vme_start) {
3037                 _vm_map_clip_start(&map->hdr,
3038                                    entry,
3039                                    start_unnest);
3040                 UPDATE_FIRST_FREE(map, map->first_free);
3041         }
3042         if (entry->vme_end > end_unnest) {
3043                 _vm_map_clip_end(&map->hdr,
3044                                  entry,
3045                                  end_unnest);
3046                 UPDATE_FIRST_FREE(map, map->first_free);
3047         }
3048
3049         pmap_unnest(map->pmap,
3050                     entry->vme_start,
3051                     entry->vme_end - entry->vme_start);
3052         if ((map->mapped) && (map->ref_count)) {
3053                 /* clean up parent map/maps */
3054                 vm_map_submap_pmap_clean(
3055                         map, entry->vme_start,
3056                         entry->vme_end,
3057                         entry->object.sub_map,
3058                         entry->offset);
3059         }
3060         entry->use_pmap = FALSE;
3061 }
3062 #endif  /* NO_NESTED_PMAP */
3063
3064 /*
3065  *      vm_map_clip_start:      [ internal use only ]
3066  *
3067  *      Asserts that the given entry begins at or after
3068  *      the specified address; if necessary,
3069  *      it splits the entry into two.
3070  */
3071 static void
3072 vm_map_clip_start(
3073         vm_map_t        map,
3074         vm_map_entry_t  entry,
3075         vm_map_offset_t startaddr)
3076 {
3077 #ifndef NO_NESTED_PMAP
3078         if (entry->use_pmap &&
3079             startaddr >= entry->vme_start) {
3080                 vm_map_offset_t start_unnest, end_unnest;
3081
3082                 /*
3083                  * Make sure "startaddr" is no longer in a nested range
3084                  * before we clip.  Unnest only the minimum range the platform
3085                  * can handle.
3086                  * vm_map_clip_unnest may perform additional adjustments to
3087                  * the unnest range.
3088                  */
3089                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3090                 end_unnest = start_unnest + pmap_nesting_size_min;
3091                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3092         }
3093 #endif /* NO_NESTED_PMAP */
3094         if (startaddr > entry->vme_start) {
3095                 if (entry->object.vm_object &&
3096                     !entry->is_sub_map &&
3097                     entry->object.vm_object->phys_contiguous) {
3098                         pmap_remove(map->pmap,
3099                                     (addr64_t)(entry->vme_start),
3100                                     (addr64_t)(entry->vme_end));
3101                 }
3102                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3103                 UPDATE_FIRST_FREE(map, map->first_free);
3104         }
3105 }
3106
3107
3108 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3109         MACRO_BEGIN \
3110         if ((startaddr) > (entry)->vme_start) \
3111                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3112         MACRO_END
3113
3114 /*
3115  *      This routine is called only when it is known that
3116  *      the entry must be split.
3117  */
3118 static void
3119 _vm_map_clip_start(
3120         register struct vm_map_header   *map_header,
3121         register vm_map_entry_t         entry,
3122         register vm_map_offset_t                start)
3123 {
3124         register vm_map_entry_t new_entry;
3125
3126         /*
3127          *      Split off the front portion --
3128          *      note that we must insert the new
3129          *      entry BEFORE this one, so that
3130          *      this entry has the specified starting
3131          *      address.
3132          */
3133
3134         new_entry = _vm_map_entry_create(map_header);
3135         vm_map_entry_copy_full(new_entry, entry);
3136
3137         new_entry->vme_end = start;
3138         entry->offset += (start - entry->vme_start);
3139         entry->vme_start = start;
3140
3141         _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
3142
3143         if (entry->is_sub_map)
3144                 vm_map_reference(new_entry->object.sub_map);
3145         else
3146                 vm_object_reference(new_entry->object.vm_object);
3147 }
3148
3149
3150 /*
3151  *      vm_map_clip_end:        [ internal use only ]
3152  *
3153  *      Asserts that the given entry ends at or before
3154  *      the specified address; if necessary,
3155  *      it splits the entry into two.
3156  */
3157 static void
3158 vm_map_clip_end(
3159         vm_map_t        map,
3160         vm_map_entry_t  entry,
3161         vm_map_offset_t endaddr)
3162 {
3163         if (endaddr > entry->vme_end) {
3164                 /*
3165                  * Within the scope of this clipping, limit "endaddr" to
3166                  * the end of this map entry...
3167                  */
3168                 endaddr = entry->vme_end;
3169         }
3170 #ifndef NO_NESTED_PMAP
3171         if (entry->use_pmap) {
3172                 vm_map_offset_t start_unnest, end_unnest;
3173
3174                 /*
3175                  * Make sure the range between the start of this entry and
3176                  * the new "endaddr" is no longer nested before we clip.
3177                  * Unnest only the minimum range the platform can handle.
3178                  * vm_map_clip_unnest may perform additional adjustments to
3179                  * the unnest range.
3180                  */
3181                 start_unnest = entry->vme_start;
3182                 end_unnest =
3183                         (endaddr + pmap_nesting_size_min - 1) &
3184                         ~(pmap_nesting_size_min - 1);
3185                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3186         }
3187 #endif /* NO_NESTED_PMAP */
3188         if (endaddr < entry->vme_end) {
3189                 if (entry->object.vm_object &&
3190                     !entry->is_sub_map &&
3191                     entry->object.vm_object->phys_contiguous) {
3192                         pmap_remove(map->pmap,
3193                                     (addr64_t)(entry->vme_start),
3194                                     (addr64_t)(entry->vme_end));
3195                 }
3196                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3197                 UPDATE_FIRST_FREE(map, map->first_free);
3198         }
3199 }
3200
3201
3202 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3203         MACRO_BEGIN \
3204         if ((endaddr) < (entry)->vme_end) \
3205                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3206         MACRO_END
3207
3208 /*
3209  *      This routine is called only when it is known that
3210  *      the entry must be split.
3211  */
3212 static void
3213 _vm_map_clip_end(
3214         register struct vm_map_header   *map_header,
3215         register vm_map_entry_t         entry,
3216         register vm_map_offset_t        end)
3217 {
3218         register vm_map_entry_t new_entry;
3219
3220         /*
3221          *      Create a new entry and insert it
3222          *      AFTER the specified entry
3223          */
3224
3225         new_entry = _vm_map_entry_create(map_header);
3226         vm_map_entry_copy_full(new_entry, entry);
3227
3228         new_entry->vme_start = entry->vme_end = end;
3229         new_entry->offset += (end - entry->vme_start);
3230
3231         _vm_map_entry_link(map_header, entry, new_entry);
3232
3233         if (entry->is_sub_map)
3234                 vm_map_reference(new_entry->object.sub_map);
3235         else
3236                 vm_object_reference(new_entry->object.vm_object);
3237 }
3238
3239
3240 /*
3241  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3242  *
3243  *      Asserts that the starting and ending region
3244  *      addresses fall within the valid range of the map.
3245  */
3246 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3247         MACRO_BEGIN                             \
3248         if (start < vm_map_min(map))            \
3249                 start = vm_map_min(map);        \
3250         if (end > vm_map_max(map))              \
3251                 end = vm_map_max(map);          \
3252         if (start > end)                        \
3253                 start = end;                    \
3254         MACRO_END
3255
3256 /*
3257  *      vm_map_range_check:     [ internal use only ]
3258  *
3259  *      Check that the region defined by the specified start and
3260  *      end addresses are wholly contained within a single map
3261  *      entry or set of adjacent map entries of the spacified map,
3262  *      i.e. the specified region contains no unmapped space.
3263  *      If any or all of the region is unmapped, FALSE is returned.
3264  *      Otherwise, TRUE is returned and if the output argument 'entry'
3265  *      is not NULL it points to the map entry containing the start
3266  *      of the region.
3267  *
3268  *      The map is locked for reading on entry and is left locked.
3269  */
3270 static boolean_t
3271 vm_map_range_check(
3272         register vm_map_t       map,
3273         register vm_map_offset_t        start,
3274         register vm_map_offset_t        end,
3275         vm_map_entry_t          *entry)
3276 {
3277         vm_map_entry_t          cur;
3278         register vm_map_offset_t        prev;
3279
3280         /*
3281          *      Basic sanity checks first
3282          */
3283         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3284                 return (FALSE);
3285
3286         /*
3287          *      Check first if the region starts within a valid
3288          *      mapping for the map.
3289          */
3290         if (!vm_map_lookup_entry(map, start, &cur))
3291                 return (FALSE);
3292
3293         /*
3294          *      Optimize for the case that the region is contained
3295          *      in a single map entry.
3296          */
3297         if (entry != (vm_map_entry_t *) NULL)
3298                 *entry = cur;
3299         if (end <= cur->vme_end)
3300                 return (TRUE);
3301
3302         /*
3303          *      If the region is not wholly contained within a
3304          *      single entry, walk the entries looking for holes.
3305          */
3306         prev = cur->vme_end;
3307         cur = cur->vme_next;
3308         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3309                 if (end <= cur->vme_end)
3310                         return (TRUE);
3311                 prev = cur->vme_end;
3312                 cur = cur->vme_next;
3313         }
3314         return (FALSE);
3315 }
3316
3317 /*
3318  *      vm_map_submap:          [ kernel use only ]
3319  *
3320  *      Mark the given range as handled by a subordinate map.
3321  *
3322  *      This range must have been created with vm_map_find using
3323  *      the vm_submap_object, and no other operations may have been
3324  *      performed on this range prior to calling vm_map_submap.
3325  *
3326  *      Only a limited number of operations can be performed
3327  *      within this rage after calling vm_map_submap:
3328  *              vm_fault
3329  *      [Don't try vm_map_copyin!]
3330  *
3331  *      To remove a submapping, one must first remove the
3332  *      range from the superior map, and then destroy the
3333  *      submap (if desired).  [Better yet, don't try it.]
3334  */
3335 kern_return_t
3336 vm_map_submap(
3337         vm_map_t                map,
3338         vm_map_offset_t start,
3339         vm_map_offset_t end,
3340         vm_map_t                submap,
3341         vm_map_offset_t offset,
3342 #ifdef NO_NESTED_PMAP
3343         __unused
3344 #endif  /* NO_NESTED_PMAP */
3345         boolean_t               use_pmap)
3346 {
3347         vm_map_entry_t          entry;
3348         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3349         register vm_object_t    object;
3350
3351         vm_map_lock(map);
3352
3353         if (! vm_map_lookup_entry(map, start, &entry)) {
3354                 entry = entry->vme_next;
3355         }
3356
3357         if (entry == vm_map_to_entry(map) ||
3358             entry->is_sub_map) {
3359                 vm_map_unlock(map);
3360                 return KERN_INVALID_ARGUMENT;
3361         }
3362
3363         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3364         vm_map_clip_start(map, entry, start);
3365         vm_map_clip_end(map, entry, end);
3366
3367         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3368             (!entry->is_sub_map) &&
3369             ((object = entry->object.vm_object) == vm_submap_object) &&
3370             (object->resident_page_count == 0) &&
3371             (object->copy == VM_OBJECT_NULL) &&
3372             (object->shadow == VM_OBJECT_NULL) &&
3373             (!object->pager_created)) {
3374                 entry->offset = (vm_object_offset_t)offset;
3375                 entry->object.vm_object = VM_OBJECT_NULL;
3376                 vm_object_deallocate(object);
3377                 entry->is_sub_map = TRUE;
3378                 entry->object.sub_map = submap;
3379                 vm_map_reference(submap);
3380                 submap->mapped = TRUE;
3381
3382 #ifndef NO_NESTED_PMAP
3383                 if (use_pmap) {
3384                         /* nest if platform code will allow */
3385                         if(submap->pmap == NULL) {
3386                                 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE);
3387                                 if(submap->pmap == PMAP_NULL) {
3388                                         vm_map_unlock(map);
3389                                         return(KERN_NO_SPACE);
3390                                 }
3391                         }
3392                         result = pmap_nest(map->pmap,
3393                                            (entry->object.sub_map)->pmap,
3394                                            (addr64_t)start,
3395                                            (addr64_t)start,
3396                                            (uint64_t)(end - start));
3397                         if(result)
3398                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3399                         entry->use_pmap = TRUE;
3400                 }
3401 #else   /* NO_NESTED_PMAP */
3402                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3403 #endif  /* NO_NESTED_PMAP */
3404                 result = KERN_SUCCESS;
3405         }
3406         vm_map_unlock(map);
3407
3408         return(result);
3409 }
3410
3411 /*
3412  *      vm_map_protect:
3413  *
3414  *      Sets the protection of the specified address
3415  *      region in the target map.  If "set_max" is
3416  *      specified, the maximum protection is to be set;
3417  *      otherwise, only the current protection is affected.
3418  */
3419 kern_return_t
3420 vm_map_protect(
3421         register vm_map_t       map,
3422         register vm_map_offset_t        start,
3423         register vm_map_offset_t        end,
3424         register vm_prot_t      new_prot,
3425         register boolean_t      set_max)
3426 {
3427         register vm_map_entry_t         current;
3428         register vm_map_offset_t        prev;
3429         vm_map_entry_t                  entry;
3430         vm_prot_t                       new_max;
3431
3432         XPR(XPR_VM_MAP,
3433             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3434             map, start, end, new_prot, set_max);
3435
3436         vm_map_lock(map);
3437
3438         /* LP64todo - remove this check when vm_map_commpage64()
3439          * no longer has to stuff in a map_entry for the commpage
3440          * above the map's max_offset.
3441          */
3442         if (start >= map->max_offset) {
3443                 vm_map_unlock(map);
3444                 return(KERN_INVALID_ADDRESS);
3445         }
3446
3447         while(1) {
3448                 /*
3449                  *      Lookup the entry.  If it doesn't start in a valid
3450                  *      entry, return an error.
3451                  */
3452                 if (! vm_map_lookup_entry(map, start, &entry)) {
3453                         vm_map_unlock(map);
3454                         return(KERN_INVALID_ADDRESS);
3455                 }
3456
3457                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3458                         start = SUPERPAGE_ROUND_DOWN(start);
3459                         continue;
3460                 }
3461                 break;
3462         }
3463         if (entry->superpage_size)
3464                 end = SUPERPAGE_ROUND_UP(end);
3465
3466         /*
3467          *      Make a first pass to check for protection and address
3468          *      violations.
3469          */
3470
3471         current = entry;
3472         prev = current->vme_start;
3473         while ((current != vm_map_to_entry(map)) &&
3474                (current->vme_start < end)) {
3475
3476                 /*
3477                  * If there is a hole, return an error.
3478                  */
3479                 if (current->vme_start != prev) {
3480                         vm_map_unlock(map);
3481                         return(KERN_INVALID_ADDRESS);
3482                 }
3483
3484                 new_max = current->max_protection;
3485                 if(new_prot & VM_PROT_COPY) {
3486                         new_max |= VM_PROT_WRITE;
3487                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3488                                 vm_map_unlock(map);
3489                                 return(KERN_PROTECTION_FAILURE);
3490                         }
3491                 } else {
3492                         if ((new_prot & new_max) != new_prot) {
3493                                 vm_map_unlock(map);
3494                                 return(KERN_PROTECTION_FAILURE);
3495                         }
3496                 }
3497
3498 #if CONFIG_EMBEDDED
3499                 if (new_prot & VM_PROT_WRITE) {
3500                         if (new_prot & VM_PROT_EXECUTE) {
3501                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3502                                 new_prot &= ~VM_PROT_EXECUTE;
3503                         }
3504                 }
3505 #endif
3506
3507                 prev = current->vme_end;
3508                 current = current->vme_next;
3509         }
3510         if (end > prev) {
3511                 vm_map_unlock(map);
3512                 return(KERN_INVALID_ADDRESS);
3513         }
3514
3515         /*
3516          *      Go back and fix up protections.
3517          *      Clip to start here if the range starts within
3518          *      the entry.
3519          */
3520
3521         current = entry;
3522         if (current != vm_map_to_entry(map)) {
3523                 /* clip and unnest if necessary */
3524                 vm_map_clip_start(map, current, start);
3525         }
3526
3527         while ((current != vm_map_to_entry(map)) &&
3528                (current->vme_start < end)) {
3529
3530                 vm_prot_t       old_prot;
3531
3532                 vm_map_clip_end(map, current, end);
3533
3534                 assert(!current->use_pmap); /* clipping did unnest if needed */
3535
3536                 old_prot = current->protection;
3537
3538                 if(new_prot & VM_PROT_COPY) {
3539                         /* caller is asking specifically to copy the      */
3540                         /* mapped data, this implies that max protection  */
3541                         /* will include write.  Caller must be prepared   */
3542                         /* for loss of shared memory communication in the */
3543                         /* target area after taking this step */
3544                         current->needs_copy = TRUE;
3545                         current->max_protection |= VM_PROT_WRITE;
3546                 }
3547
3548                 if (set_max)
3549                         current->protection =
3550                                 (current->max_protection =
3551                                  new_prot & ~VM_PROT_COPY) &
3552                                 old_prot;
3553                 else
3554                         current->protection = new_prot & ~VM_PROT_COPY;
3555
3556                 /*
3557                  *      Update physical map if necessary.
3558                  *      If the request is to turn off write protection,
3559                  *      we won't do it for real (in pmap). This is because
3560                  *      it would cause copy-on-write to fail.  We've already
3561                  *      set, the new protection in the map, so if a
3562                  *      write-protect fault occurred, it will be fixed up
3563                  *      properly, COW or not.
3564                  */
3565                 if (current->protection != old_prot) {
3566                         /* Look one level in we support nested pmaps */
3567                         /* from mapped submaps which are direct entries */
3568                         /* in our map */
3569
3570                         vm_prot_t prot;
3571
3572                         prot = current->protection & ~VM_PROT_WRITE;
3573
3574                         if (override_nx(map, current->alias) && prot)
3575                                 prot |= VM_PROT_EXECUTE;
3576
3577                         if (current->is_sub_map && current->use_pmap) {
3578                                 pmap_protect(current->object.sub_map->pmap,
3579                                              current->vme_start,
3580                                              current->vme_end,
3581                                              prot);
3582                         } else {
3583                                 pmap_protect(map->pmap,
3584                                              current->vme_start,
3585                                              current->vme_end,
3586                                              prot);
3587                         }
3588                 }
3589                 current = current->vme_next;
3590         }
3591
3592         current = entry;
3593         while ((current != vm_map_to_entry(map)) &&
3594                (current->vme_start <= end)) {
3595                 vm_map_simplify_entry(map, current);
3596                 current = current->vme_next;
3597         }
3598
3599         vm_map_unlock(map);
3600         return(KERN_SUCCESS);
3601 }
3602
3603 /*
3604  *      vm_map_inherit:
3605  *
3606  *      Sets the inheritance of the specified address
3607  *      range in the target map.  Inheritance
3608  *      affects how the map will be shared with
3609  *      child maps at the time of vm_map_fork.
3610  */
3611 kern_return_t
3612 vm_map_inherit(
3613         register vm_map_t       map,
3614         register vm_map_offset_t        start,
3615         register vm_map_offset_t        end,
3616         register vm_inherit_t   new_inheritance)
3617 {
3618         register vm_map_entry_t entry;
3619         vm_map_entry_t  temp_entry;
3620
3621         vm_map_lock(map);
3622
3623         VM_MAP_RANGE_CHECK(map, start, end);
3624
3625         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3626                 entry = temp_entry;
3627         }
3628         else {
3629                 temp_entry = temp_entry->vme_next;
3630                 entry = temp_entry;
3631         }
3632
3633         /* first check entire range for submaps which can't support the */
3634         /* given inheritance. */
3635         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3636                 if(entry->is_sub_map) {
3637                         if(new_inheritance == VM_INHERIT_COPY) {
3638                                 vm_map_unlock(map);
3639                                 return(KERN_INVALID_ARGUMENT);
3640                         }
3641                 }
3642
3643                 entry = entry->vme_next;
3644         }
3645
3646         entry = temp_entry;
3647         if (entry != vm_map_to_entry(map)) {
3648                 /* clip and unnest if necessary */
3649                 vm_map_clip_start(map, entry, start);
3650         }
3651
3652         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3653                 vm_map_clip_end(map, entry, end);
3654                 assert(!entry->use_pmap); /* clip did unnest if needed */
3655
3656                 entry->inheritance = new_inheritance;
3657
3658                 entry = entry->vme_next;
3659         }
3660
3661         vm_map_unlock(map);
3662         return(KERN_SUCCESS);
3663 }
3664
3665 /*
3666  * Update the accounting for the amount of wired memory in this map.  If the user has
3667  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3668  */
3669
3670 static kern_return_t
3671 add_wire_counts(
3672         vm_map_t        map,
3673         vm_map_entry_t  entry,
3674         boolean_t       user_wire)
3675 {
3676         vm_map_size_t   size;
3677
3678         if (user_wire) {
3679
3680                 /*
3681                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3682                  * this map entry.
3683                  */
3684
3685                 if (entry->user_wired_count == 0) {
3686                         size = entry->vme_end - entry->vme_start;
3687
3688                         /*
3689                          * Since this is the first time the user is wiring this map entry, check to see if we're
3690                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3691                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3692                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3693                          * limit, then we fail.
3694                          */
3695
3696                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3697                            size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit ||
3698                            size + ptoa_64(vm_page_wire_count) > max_mem - vm_global_no_user_wire_amount)
3699                                 return KERN_RESOURCE_SHORTAGE;
3700
3701                         /*
3702                          * The first time the user wires an entry, we also increment the wired_count and add this to
3703                          * the total that has been wired in the map.
3704                          */
3705
3706                         if (entry->wired_count >= MAX_WIRE_COUNT)
3707                                 return KERN_FAILURE;
3708
3709                         entry->wired_count++;
3710                         map->user_wire_size += size;
3711                 }
3712
3713                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3714                         return KERN_FAILURE;
3715
3716                 entry->user_wired_count++;
3717
3718         } else {
3719
3720                 /*
3721                  * The kernel's wiring the memory.  Just bump the count and continue.
3722                  */
3723
3724                 if (entry->wired_count >= MAX_WIRE_COUNT)
3725                         panic("vm_map_wire: too many wirings");
3726
3727                 entry->wired_count++;
3728         }
3729
3730         return KERN_SUCCESS;
3731 }
3732
3733 /*
3734  * Update the memory wiring accounting now that the given map entry is being unwired.
3735  */
3736
3737 static void
3738 subtract_wire_counts(
3739         vm_map_t        map,
3740         vm_map_entry_t  entry,
3741         boolean_t       user_wire)
3742 {
3743
3744         if (user_wire) {
3745
3746                 /*
3747                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3748                  */
3749
3750                 if (entry->user_wired_count == 1) {
3751
3752                         /*
3753                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3754                          * user wired memory for this map.
3755                          */
3756
3757                         assert(entry->wired_count >= 1);
3758                         entry->wired_count--;
3759                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3760                 }
3761
3762                 assert(entry->user_wired_count >= 1);
3763                 entry->user_wired_count--;
3764
3765         } else {
3766
3767                 /*
3768                  * The kernel is unwiring the memory.   Just update the count.
3769                  */
3770
3771                 assert(entry->wired_count >= 1);
3772                 entry->wired_count--;
3773         }
3774 }
3775
3776 /*
3777  *      vm_map_wire:
3778  *
3779  *      Sets the pageability of the specified address range in the
3780  *      target map as wired.  Regions specified as not pageable require
3781  *      locked-down physical memory and physical page maps.  The
3782  *      access_type variable indicates types of accesses that must not
3783  *      generate page faults.  This is checked against protection of
3784  *      memory being locked-down.
3785  *
3786  *      The map must not be locked, but a reference must remain to the
3787  *      map throughout the call.
3788  */
3789 static kern_return_t
3790 vm_map_wire_nested(
3791         register vm_map_t       map,
3792         register vm_map_offset_t        start,
3793         register vm_map_offset_t        end,
3794         register vm_prot_t      access_type,
3795         boolean_t               user_wire,
3796         pmap_t                  map_pmap,
3797         vm_map_offset_t         pmap_addr)
3798 {
3799         register vm_map_entry_t entry;
3800         struct vm_map_entry     *first_entry, tmp_entry;
3801         vm_map_t                real_map;
3802         register vm_map_offset_t        s,e;
3803         kern_return_t           rc;
3804         boolean_t               need_wakeup;
3805         boolean_t               main_map = FALSE;
3806         wait_interrupt_t        interruptible_state;
3807         thread_t                cur_thread;
3808         unsigned int            last_timestamp;
3809         vm_map_size_t           size;
3810
3811         vm_map_lock(map);
3812         if(map_pmap == NULL)
3813                 main_map = TRUE;
3814         last_timestamp = map->timestamp;
3815
3816         VM_MAP_RANGE_CHECK(map, start, end);
3817         assert(page_aligned(start));
3818         assert(page_aligned(end));
3819         if (start == end) {
3820                 /* We wired what the caller asked for, zero pages */
3821                 vm_map_unlock(map);
3822                 return KERN_SUCCESS;
3823         }
3824
3825         need_wakeup = FALSE;
3826         cur_thread = current_thread();
3827
3828         s = start;
3829         rc = KERN_SUCCESS;
3830
3831         if (vm_map_lookup_entry(map, s, &first_entry)) {
3832                 entry = first_entry;
3833                 /*
3834                  * vm_map_clip_start will be done later.
3835                  * We don't want to unnest any nested submaps here !
3836                  */
3837         } else {
3838                 /* Start address is not in map */
3839                 rc = KERN_INVALID_ADDRESS;
3840                 goto done;
3841         }
3842
3843         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3844                 /*
3845                  * At this point, we have wired from "start" to "s".
3846                  * We still need to wire from "s" to "end".
3847                  *
3848                  * "entry" hasn't been clipped, so it could start before "s"
3849                  * and/or end after "end".
3850                  */
3851
3852                 /* "e" is how far we want to wire in this entry */
3853                 e = entry->vme_end;
3854                 if (e > end)
3855                         e = end;
3856
3857                 /*
3858                  * If another thread is wiring/unwiring this entry then
3859                  * block after informing other thread to wake us up.
3860                  */
3861                 if (entry->in_transition) {
3862                         wait_result_t wait_result;
3863
3864                         /*
3865                          * We have not clipped the entry.  Make sure that
3866                          * the start address is in range so that the lookup
3867                          * below will succeed.
3868                          * "s" is the current starting point: we've already
3869                          * wired from "start" to "s" and we still have
3870                          * to wire from "s" to "end".
3871                          */
3872
3873                         entry->needs_wakeup = TRUE;
3874
3875                         /*
3876                          * wake up anybody waiting on entries that we have
3877                          * already wired.
3878                          */
3879                         if (need_wakeup) {
3880                                 vm_map_entry_wakeup(map);
3881                                 need_wakeup = FALSE;
3882                         }
3883                         /*
3884                          * User wiring is interruptible
3885                          */
3886                         wait_result = vm_map_entry_wait(map,
3887                                                         (user_wire) ? THREAD_ABORTSAFE :
3888                                                         THREAD_UNINT);
3889                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3890                                 /*
3891                                  * undo the wirings we have done so far
3892                                  * We do not clear the needs_wakeup flag,
3893                                  * because we cannot tell if we were the
3894                                  * only one waiting.
3895                                  */
3896                                 rc = KERN_FAILURE;
3897                                 goto done;
3898                         }
3899
3900                         /*
3901                          * Cannot avoid a lookup here. reset timestamp.
3902                          */
3903                         last_timestamp = map->timestamp;
3904
3905                         /*
3906                          * The entry could have been clipped, look it up again.
3907                          * Worse that can happen is, it may not exist anymore.
3908                          */
3909                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3910                                 if (!user_wire)
3911                                         panic("vm_map_wire: re-lookup failed");
3912
3913                                 /*
3914                                  * User: undo everything upto the previous
3915                                  * entry.  let vm_map_unwire worry about
3916                                  * checking the validity of the range.
3917                                  */
3918                                 rc = KERN_FAILURE;
3919                                 goto done;
3920                         }
3921                         entry = first_entry;
3922                         continue;
3923                 }
3924
3925                 if (entry->is_sub_map) {
3926                         vm_map_offset_t sub_start;
3927                         vm_map_offset_t sub_end;
3928                         vm_map_offset_t local_start;
3929                         vm_map_offset_t local_end;
3930                         pmap_t          pmap;
3931
3932                         vm_map_clip_start(map, entry, s);
3933                         vm_map_clip_end(map, entry, end);
3934
3935                         sub_start = entry->offset;
3936                         sub_end = entry->vme_end;
3937                         sub_end += entry->offset - entry->vme_start;
3938
3939                         local_end = entry->vme_end;
3940                         if(map_pmap == NULL) {
3941                                 vm_object_t             object;
3942                                 vm_object_offset_t      offset;
3943                                 vm_prot_t               prot;
3944                                 boolean_t               wired;
3945                                 vm_map_entry_t          local_entry;
3946                                 vm_map_version_t         version;
3947                                 vm_map_t                lookup_map;
3948
3949                                 if(entry->use_pmap) {
3950                                         pmap = entry->object.sub_map->pmap;
3951                                         /* ppc implementation requires that */
3952                                         /* submaps pmap address ranges line */
3953                                         /* up with parent map */
3954 #ifdef notdef
3955                                         pmap_addr = sub_start;
3956 #endif
3957                                         pmap_addr = s;
3958                                 } else {
3959                                         pmap = map->pmap;
3960                                         pmap_addr = s;
3961                                 }
3962
3963                                 if (entry->wired_count) {
3964                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
3965                                                 goto done;
3966
3967                                         /*
3968                                          * The map was not unlocked:
3969                                          * no need to goto re-lookup.
3970                                          * Just go directly to next entry.
3971                                          */
3972                                         entry = entry->vme_next;
3973                                         s = entry->vme_start;
3974                                         continue;
3975
3976                                 }
3977
3978                                 /* call vm_map_lookup_locked to */
3979                                 /* cause any needs copy to be   */
3980                                 /* evaluated */
3981                                 local_start = entry->vme_start;
3982                                 lookup_map = map;
3983                                 vm_map_lock_write_to_read(map);
3984                                 if(vm_map_lookup_locked(
3985                                            &lookup_map, local_start,
3986                                            access_type,
3987                                            OBJECT_LOCK_EXCLUSIVE,
3988                                            &version, &object,
3989                                            &offset, &prot, &wired,
3990                                            NULL,
3991                                            &real_map)) {
3992
3993                                         vm_map_unlock_read(lookup_map);
3994                                         vm_map_unwire(map, start,
3995                                                       s, user_wire);
3996                                         return(KERN_FAILURE);
3997                                 }
3998                                 if(real_map != lookup_map)
3999                                         vm_map_unlock(real_map);
4000                                 vm_map_unlock_read(lookup_map);
4001                                 vm_map_lock(map);
4002                                 vm_object_unlock(object);
4003
4004                                 /* we unlocked, so must re-lookup */
4005                                 if (!vm_map_lookup_entry(map,
4006                                                          local_start,
4007                                                          &local_entry)) {
4008                                         rc = KERN_FAILURE;
4009                                         goto done;
4010                                 }
4011
4012                                 /*
4013                                  * entry could have been "simplified",
4014                                  * so re-clip
4015                                  */
4016                                 entry = local_entry;
4017                                 assert(s == local_start);
4018                                 vm_map_clip_start(map, entry, s);
4019                                 vm_map_clip_end(map, entry, end);
4020                                 /* re-compute "e" */
4021                                 e = entry->vme_end;
4022                                 if (e > end)
4023                                         e = end;
4024
4025                                 /* did we have a change of type? */
4026                                 if (!entry->is_sub_map) {
4027                                         last_timestamp = map->timestamp;
4028                                         continue;
4029                                 }
4030                         } else {
4031                                 local_start = entry->vme_start;
4032                                 pmap = map_pmap;
4033                         }
4034
4035                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4036                                 goto done;
4037
4038                         entry->in_transition = TRUE;
4039
4040                         vm_map_unlock(map);
4041                         rc = vm_map_wire_nested(entry->object.sub_map,
4042                                                 sub_start, sub_end,
4043                                                 access_type,
4044                                                 user_wire, pmap, pmap_addr);
4045                         vm_map_lock(map);
4046
4047                         /*
4048                          * Find the entry again.  It could have been clipped
4049                          * after we unlocked the map.
4050                          */
4051                         if (!vm_map_lookup_entry(map, local_start,
4052                                                  &first_entry))
4053                                 panic("vm_map_wire: re-lookup failed");
4054                         entry = first_entry;
4055
4056                         assert(local_start == s);
4057                         /* re-compute "e" */
4058                         e = entry->vme_end;
4059                         if (e > end)
4060                                 e = end;
4061
4062                         last_timestamp = map->timestamp;
4063                         while ((entry != vm_map_to_entry(map)) &&
4064                                (entry->vme_start < e)) {
4065                                 assert(entry->in_transition);
4066                                 entry->in_transition = FALSE;
4067                                 if (entry->needs_wakeup) {
4068                                         entry->needs_wakeup = FALSE;
4069                                         need_wakeup = TRUE;
4070                                 }
4071                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4072                                         subtract_wire_counts(map, entry, user_wire);
4073                                 }
4074                                 entry = entry->vme_next;
4075                         }
4076                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4077                                 goto done;
4078                         }
4079
4080                         /* no need to relookup again */
4081                         s = entry->vme_start;
4082                         continue;
4083                 }
4084
4085                 /*
4086                  * If this entry is already wired then increment
4087                  * the appropriate wire reference count.
4088                  */
4089                 if (entry->wired_count) {
4090                         /*
4091                          * entry is already wired down, get our reference
4092                          * after clipping to our range.
4093                          */
4094                         vm_map_clip_start(map, entry, s);
4095                         vm_map_clip_end(map, entry, end);
4096
4097                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4098                                 goto done;
4099
4100                         /* map was not unlocked: no need to relookup */
4101                         entry = entry->vme_next;
4102                         s = entry->vme_start;
4103                         continue;
4104                 }
4105
4106                 /*
4107                  * Unwired entry or wire request transmitted via submap
4108                  */
4109
4110
4111                 /*
4112                  * Perform actions of vm_map_lookup that need the write
4113                  * lock on the map: create a shadow object for a
4114                  * copy-on-write region, or an object for a zero-fill
4115                  * region.
4116                  */
4117                 size = entry->vme_end - entry->vme_start;
4118                 /*
4119                  * If wiring a copy-on-write page, we need to copy it now
4120                  * even if we're only (currently) requesting read access.
4121                  * This is aggressive, but once it's wired we can't move it.
4122                  */
4123                 if (entry->needs_copy) {
4124                         vm_object_shadow(&entry->object.vm_object,
4125                                          &entry->offset, size);
4126                         entry->needs_copy = FALSE;
4127                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4128                         entry->object.vm_object = vm_object_allocate(size);
4129                         entry->offset = (vm_object_offset_t)0;
4130                 }
4131
4132                 vm_map_clip_start(map, entry, s);
4133                 vm_map_clip_end(map, entry, end);
4134
4135                 /* re-compute "e" */
4136                 e = entry->vme_end;
4137                 if (e > end)
4138                         e = end;
4139
4140                 /*
4141                  * Check for holes and protection mismatch.
4142                  * Holes: Next entry should be contiguous unless this
4143                  *        is the end of the region.
4144                  * Protection: Access requested must be allowed, unless
4145                  *      wiring is by protection class
4146                  */
4147                 if ((entry->vme_end < end) &&
4148                     ((entry->vme_next == vm_map_to_entry(map)) ||
4149                      (entry->vme_next->vme_start > entry->vme_end))) {
4150                         /* found a hole */
4151                         rc = KERN_INVALID_ADDRESS;
4152                         goto done;
4153                 }
4154                 if ((entry->protection & access_type) != access_type) {
4155                         /* found a protection problem */
4156                         rc = KERN_PROTECTION_FAILURE;
4157                         goto done;
4158                 }
4159
4160                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4161
4162                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4163                         goto done;
4164
4165                 entry->in_transition = TRUE;
4166
4167                 /*
4168                  * This entry might get split once we unlock the map.
4169                  * In vm_fault_wire(), we need the current range as
4170                  * defined by this entry.  In order for this to work
4171                  * along with a simultaneous clip operation, we make a
4172                  * temporary copy of this entry and use that for the
4173                  * wiring.  Note that the underlying objects do not
4174                  * change during a clip.
4175                  */
4176                 tmp_entry = *entry;
4177
4178                 /*
4179                  * The in_transition state guarentees that the entry
4180                  * (or entries for this range, if split occured) will be
4181                  * there when the map lock is acquired for the second time.
4182                  */
4183                 vm_map_unlock(map);
4184
4185                 if (!user_wire && cur_thread != THREAD_NULL)
4186                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4187                 else
4188                         interruptible_state = THREAD_UNINT;
4189
4190                 if(map_pmap)
4191                         rc = vm_fault_wire(map,
4192                                            &tmp_entry, map_pmap, pmap_addr);
4193                 else
4194                         rc = vm_fault_wire(map,
4195                                            &tmp_entry, map->pmap,
4196                                            tmp_entry.vme_start);
4197
4198                 if (!user_wire && cur_thread != THREAD_NULL)
4199                         thread_interrupt_level(interruptible_state);
4200
4201                 vm_map_lock(map);
4202
4203                 if (last_timestamp+1 != map->timestamp) {
4204                         /*
4205                          * Find the entry again.  It could have been clipped
4206                          * after we unlocked the map.
4207                          */
4208                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4209                                                  &first_entry))
4210                                 panic("vm_map_wire: re-lookup failed");
4211
4212                         entry = first_entry;
4213                 }
4214
4215                 last_timestamp = map->timestamp;
4216
4217                 while ((entry != vm_map_to_entry(map)) &&
4218                        (entry->vme_start < tmp_entry.vme_end)) {
4219                         assert(entry->in_transition);
4220                         entry->in_transition = FALSE;
4221                         if (entry->needs_wakeup) {
4222                                 entry->needs_wakeup = FALSE;
4223                                 need_wakeup = TRUE;
4224                         }
4225                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4226                                 subtract_wire_counts(map, entry, user_wire);
4227                         }
4228                         entry = entry->vme_next;
4229                 }
4230
4231                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4232                         goto done;
4233                 }
4234
4235                 s = entry->vme_start;
4236         } /* end while loop through map entries */
4237
4238 done:
4239         if (rc == KERN_SUCCESS) {
4240                 /* repair any damage we may have made to the VM map */
4241                 vm_map_simplify_range(map, start, end);
4242         }
4243
4244         vm_map_unlock(map);
4245
4246         /*
4247          * wake up anybody waiting on entries we wired.
4248          */
4249         if (need_wakeup)
4250                 vm_map_entry_wakeup(map);
4251
4252         if (rc != KERN_SUCCESS) {
4253                 /* undo what has been wired so far */
4254                 vm_map_unwire(map, start, s, user_wire);
4255         }
4256
4257         return rc;
4258
4259 }
4260
4261 kern_return_t
4262 vm_map_wire(
4263         register vm_map_t       map,
4264         register vm_map_offset_t        start,
4265         register vm_map_offset_t        end,
4266         register vm_prot_t      access_type,
4267         boolean_t               user_wire)
4268 {
4269
4270         kern_return_t   kret;
4271
4272 #ifdef ppc
4273         /*
4274          * the calls to mapping_prealloc and mapping_relpre
4275          * (along with the VM_MAP_RANGE_CHECK to insure a
4276          * resonable range was passed in) are
4277          * currently necessary because
4278          * we haven't enabled kernel pre-emption
4279          * and/or the pmap_enter cannot purge and re-use
4280          * existing mappings
4281          */
4282         VM_MAP_RANGE_CHECK(map, start, end);
4283         assert((unsigned int) (end - start) == (end - start));
4284         mapping_prealloc((unsigned int) (end - start));
4285 #endif
4286         kret = vm_map_wire_nested(map, start, end, access_type,
4287                                   user_wire, (pmap_t)NULL, 0);
4288 #ifdef ppc
4289         mapping_relpre();
4290 #endif
4291         return kret;
4292 }
4293
4294 /*
4295  *      vm_map_unwire:
4296  *
4297  *      Sets the pageability of the specified address range in the target
4298  *      as pageable.  Regions specified must have been wired previously.
4299  *
4300  *      The map must not be locked, but a reference must remain to the map
4301  *      throughout the call.
4302  *
4303  *      Kernel will panic on failures.  User unwire ignores holes and
4304  *      unwired and intransition entries to avoid losing memory by leaving
4305  *      it unwired.
4306  */
4307 static kern_return_t
4308 vm_map_unwire_nested(
4309         register vm_map_t       map,
4310         register vm_map_offset_t        start,
4311         register vm_map_offset_t        end,
4312         boolean_t               user_wire,
4313         pmap_t                  map_pmap,
4314         vm_map_offset_t         pmap_addr)
4315 {
4316         register vm_map_entry_t entry;
4317         struct vm_map_entry     *first_entry, tmp_entry;
4318         boolean_t               need_wakeup;
4319         boolean_t               main_map = FALSE;
4320         unsigned int            last_timestamp;
4321
4322         vm_map_lock(map);
4323         if(map_pmap == NULL)
4324                 main_map = TRUE;
4325         last_timestamp = map->timestamp;
4326
4327         VM_MAP_RANGE_CHECK(map, start, end);
4328         assert(page_aligned(start));
4329         assert(page_aligned(end));
4330
4331         if (start == end) {
4332                 /* We unwired what the caller asked for: zero pages */
4333                 vm_map_unlock(map);
4334                 return KERN_SUCCESS;
4335         }
4336
4337         if (vm_map_lookup_entry(map, start, &first_entry)) {
4338                 entry = first_entry;
4339                 /*
4340                  * vm_map_clip_start will be done later.
4341                  * We don't want to unnest any nested sub maps here !
4342                  */
4343         }
4344         else {
4345                 if (!user_wire) {
4346                         panic("vm_map_unwire: start not found");
4347                 }
4348                 /*      Start address is not in map. */
4349                 vm_map_unlock(map);
4350                 return(KERN_INVALID_ADDRESS);
4351         }
4352
4353         if (entry->superpage_size) {
4354                 /* superpages are always wired */
4355                 vm_map_unlock(map);
4356                 return KERN_INVALID_ADDRESS;
4357         }
4358
4359         need_wakeup = FALSE;
4360         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4361                 if (entry->in_transition) {
4362                         /*
4363                          * 1)
4364                          * Another thread is wiring down this entry. Note
4365                          * that if it is not for the other thread we would
4366                          * be unwiring an unwired entry.  This is not
4367                          * permitted.  If we wait, we will be unwiring memory
4368                          * we did not wire.
4369                          *
4370                          * 2)
4371                          * Another thread is unwiring this entry.  We did not
4372                          * have a reference to it, because if we did, this
4373                          * entry will not be getting unwired now.
4374                          */
4375                         if (!user_wire) {
4376                                 /*
4377                                  * XXX FBDP
4378                                  * This could happen:  there could be some
4379                                  * overlapping vslock/vsunlock operations
4380                                  * going on.
4381                                  * We should probably just wait and retry,
4382                                  * but then we have to be careful that this
4383                                  * entry could get "simplified" after
4384                                  * "in_transition" gets unset and before
4385                                  * we re-lookup the entry, so we would
4386                                  * have to re-clip the entry to avoid
4387                                  * re-unwiring what we have already unwired...
4388                                  * See vm_map_wire_nested().
4389                                  *
4390                                  * Or we could just ignore "in_transition"
4391                                  * here and proceed to decement the wired
4392                                  * count(s) on this entry.  That should be fine
4393                                  * as long as "wired_count" doesn't drop all
4394                                  * the way to 0 (and we should panic if THAT
4395                                  * happens).
4396                                  */
4397                                 panic("vm_map_unwire: in_transition entry");
4398                         }
4399
4400                         entry = entry->vme_next;
4401                         continue;
4402                 }
4403
4404                 if (entry->is_sub_map) {
4405                         vm_map_offset_t sub_start;
4406                         vm_map_offset_t sub_end;
4407                         vm_map_offset_t local_end;
4408                         pmap_t          pmap;
4409
4410                         vm_map_clip_start(map, entry, start);
4411                         vm_map_clip_end(map, entry, end);
4412
4413                         sub_start = entry->offset;
4414                         sub_end = entry->vme_end - entry->vme_start;
4415                         sub_end += entry->offset;
4416                         local_end = entry->vme_end;
4417                         if(map_pmap == NULL) {
4418                                 if(entry->use_pmap) {
4419                                         pmap = entry->object.sub_map->pmap;
4420                                         pmap_addr = sub_start;
4421                                 } else {
4422                                         pmap = map->pmap;
4423                                         pmap_addr = start;
4424                                 }
4425                                 if (entry->wired_count == 0 ||
4426                                     (user_wire && entry->user_wired_count == 0)) {
4427                                         if (!user_wire)
4428                                                 panic("vm_map_unwire: entry is unwired");
4429                                         entry = entry->vme_next;
4430                                         continue;
4431                                 }
4432
4433                                 /*
4434                                  * Check for holes
4435                                  * Holes: Next entry should be contiguous unless
4436                                  * this is the end of the region.
4437                                  */
4438                                 if (((entry->vme_end < end) &&
4439                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4440                                       (entry->vme_next->vme_start
4441                                        > entry->vme_end)))) {
4442                                         if (!user_wire)
4443                                                 panic("vm_map_unwire: non-contiguous region");
4444 /*
4445                                         entry = entry->vme_next;
4446                                         continue;
4447 */
4448                                 }
4449
4450                                 subtract_wire_counts(map, entry, user_wire);
4451
4452                                 if (entry->wired_count != 0) {
4453                                         entry = entry->vme_next;
4454                                         continue;
4455                                 }
4456
4457                                 entry->in_transition = TRUE;
4458                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4459
4460                                 /*
4461                                  * We can unlock the map now. The in_transition state
4462                                  * guarantees existance of the entry.
4463                                  */
4464                                 vm_map_unlock(map);
4465                                 vm_map_unwire_nested(entry->object.sub_map,
4466                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4467                                 vm_map_lock(map);
4468
4469                                 if (last_timestamp+1 != map->timestamp) {
4470                                         /*
4471                                          * Find the entry again.  It could have been
4472                                          * clipped or deleted after we unlocked the map.
4473                                          */
4474                                         if (!vm_map_lookup_entry(map,
4475                                                                  tmp_entry.vme_start,
4476                                                                  &first_entry)) {
4477                                                 if (!user_wire)
4478                                                         panic("vm_map_unwire: re-lookup failed");
4479                                                 entry = first_entry->vme_next;
4480                                         } else
4481                                                 entry = first_entry;
4482                                 }
4483                                 last_timestamp = map->timestamp;
4484
4485                                 /*
4486                                  * clear transition bit for all constituent entries
4487                                  * that were in the original entry (saved in
4488                                  * tmp_entry).  Also check for waiters.
4489                                  */
4490                                 while ((entry != vm_map_to_entry(map)) &&
4491                                        (entry->vme_start < tmp_entry.vme_end)) {
4492                                         assert(entry->in_transition);
4493                                         entry->in_transition = FALSE;
4494                                         if (entry->needs_wakeup) {
4495                                                 entry->needs_wakeup = FALSE;
4496                                                 need_wakeup = TRUE;
4497                                         }
4498                                         entry = entry->vme_next;
4499                                 }
4500                                 continue;
4501                         } else {
4502                                 vm_map_unlock(map);
4503                                 vm_map_unwire_nested(entry->object.sub_map,
4504                                                      sub_start, sub_end, user_wire, map_pmap,
4505                                                      pmap_addr);
4506                                 vm_map_lock(map);
4507
4508                                 if (last_timestamp+1 != map->timestamp) {
4509                                         /*
4510                                          * Find the entry again.  It could have been
4511                                          * clipped or deleted after we unlocked the map.
4512                                          */
4513                                         if (!vm_map_lookup_entry(map,
4514                                                                  tmp_entry.vme_start,
4515                                                                  &first_entry)) {
4516                                                 if (!user_wire)
4517                                                         panic("vm_map_unwire: re-lookup failed");
4518                                                 entry = first_entry->vme_next;
4519                                         } else
4520                                                 entry = first_entry;
4521                                 }
4522                                 last_timestamp = map->timestamp;
4523                         }
4524                 }
4525
4526
4527                 if ((entry->wired_count == 0) ||
4528                     (user_wire && entry->user_wired_count == 0)) {
4529                         if (!user_wire)
4530                                 panic("vm_map_unwire: entry is unwired");
4531
4532                         entry = entry->vme_next;
4533                         continue;
4534                 }
4535
4536                 assert(entry->wired_count > 0 &&
4537                        (!user_wire || entry->user_wired_count > 0));
4538
4539                 vm_map_clip_start(map, entry, start);
4540                 vm_map_clip_end(map, entry, end);
4541
4542                 /*
4543                  * Check for holes
4544                  * Holes: Next entry should be contiguous unless
4545                  *        this is the end of the region.
4546                  */
4547                 if (((entry->vme_end < end) &&
4548                      ((entry->vme_next == vm_map_to_entry(map)) ||
4549                       (entry->vme_next->vme_start > entry->vme_end)))) {
4550
4551                         if (!user_wire)
4552                                 panic("vm_map_unwire: non-contiguous region");
4553                         entry = entry->vme_next;
4554                         continue;
4555                 }
4556
4557                 subtract_wire_counts(map, entry, user_wire);
4558
4559                 if (entry->wired_count != 0) {
4560                         entry = entry->vme_next;
4561                         continue;
4562                 }
4563
4564                 if(entry->zero_wired_pages) {
4565                         entry->zero_wired_pages = FALSE;
4566                 }
4567
4568                 entry->in_transition = TRUE;
4569                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4570
4571                 /*
4572                  * We can unlock the map now. The in_transition state
4573                  * guarantees existance of the entry.
4574                  */
4575                 vm_map_unlock(map);
4576                 if(map_pmap) {
4577                         vm_fault_unwire(map,
4578                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4579                 } else {
4580                         vm_fault_unwire(map,
4581                                         &tmp_entry, FALSE, map->pmap,
4582                                         tmp_entry.vme_start);
4583                 }
4584                 vm_map_lock(map);
4585
4586                 if (last_timestamp+1 != map->timestamp) {
4587                         /*
4588                          * Find the entry again.  It could have been clipped
4589                          * or deleted after we unlocked the map.
4590                          */
4591                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4592                                                  &first_entry)) {
4593                                 if (!user_wire)
4594                                         panic("vm_map_unwire: re-lookup failed");
4595                                 entry = first_entry->vme_next;
4596                         } else
4597                                 entry = first_entry;
4598                 }
4599                 last_timestamp = map->timestamp;
4600
4601                 /*
4602                  * clear transition bit for all constituent entries that
4603                  * were in the original entry (saved in tmp_entry).  Also
4604                  * check for waiters.
4605                  */
4606                 while ((entry != vm_map_to_entry(map)) &&
4607                        (entry->vme_start < tmp_entry.vme_end)) {
4608                         assert(entry->in_transition);
4609                         entry->in_transition = FALSE;
4610                         if (entry->needs_wakeup) {
4611                                 entry->needs_wakeup = FALSE;
4612                                 need_wakeup = TRUE;
4613                         }
4614                         entry = entry->vme_next;
4615                 }
4616         }
4617
4618         /*
4619          * We might have fragmented the address space when we wired this
4620          * range of addresses.  Attempt to re-coalesce these VM map entries
4621          * with their neighbors now that they're no longer wired.
4622          * Under some circumstances, address space fragmentation can
4623          * prevent VM object shadow chain collapsing, which can cause
4624          * swap space leaks.
4625          */
4626         vm_map_simplify_range(map, start, end);
4627
4628         vm_map_unlock(map);
4629         /*
4630          * wake up anybody waiting on entries that we have unwired.
4631          */
4632         if (need_wakeup)
4633                 vm_map_entry_wakeup(map);
4634         return(KERN_SUCCESS);
4635
4636 }
4637
4638 kern_return_t
4639 vm_map_unwire(
4640         register vm_map_t       map,
4641         register vm_map_offset_t        start,
4642         register vm_map_offset_t        end,
4643         boolean_t               user_wire)
4644 {
4645         return vm_map_unwire_nested(map, start, end,
4646                                     user_wire, (pmap_t)NULL, 0);
4647 }
4648
4649
4650 /*
4651  *      vm_map_entry_delete:    [ internal use only ]
4652  *
4653  *      Deallocate the given entry from the target map.
4654  */
4655 static void
4656 vm_map_entry_delete(
4657         register vm_map_t       map,
4658         register vm_map_entry_t entry)
4659 {
4660         register vm_map_offset_t        s, e;
4661         register vm_object_t    object;
4662         register vm_map_t       submap;
4663
4664         s = entry->vme_start;
4665         e = entry->vme_end;
4666         assert(page_aligned(s));
4667         assert(page_aligned(e));
4668         assert(entry->wired_count == 0);
4669         assert(entry->user_wired_count == 0);
4670         assert(!entry->permanent);
4671
4672         if (entry->is_sub_map) {
4673                 object = NULL;
4674                 submap = entry->object.sub_map;
4675         } else {
4676                 submap = NULL;
4677                 object = entry->object.vm_object;
4678         }
4679
4680         vm_map_entry_unlink(map, entry);
4681         map->size -= e - s;
4682
4683         vm_map_entry_dispose(map, entry);
4684
4685         vm_map_unlock(map);
4686         /*
4687          *      Deallocate the object only after removing all
4688          *      pmap entries pointing to its pages.
4689          */
4690         if (submap)
4691                 vm_map_deallocate(submap);
4692         else
4693                 vm_object_deallocate(object);
4694
4695 }
4696
4697 void
4698 vm_map_submap_pmap_clean(
4699         vm_map_t        map,
4700         vm_map_offset_t start,
4701         vm_map_offset_t end,
4702         vm_map_t        sub_map,
4703         vm_map_offset_t offset)
4704 {
4705         vm_map_offset_t submap_start;
4706         vm_map_offset_t submap_end;
4707         vm_map_size_t   remove_size;
4708         vm_map_entry_t  entry;
4709
4710         submap_end = offset + (end - start);
4711         submap_start = offset;
4712
4713         vm_map_lock_read(sub_map);
4714         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4715
4716                 remove_size = (entry->vme_end - entry->vme_start);
4717                 if(offset > entry->vme_start)
4718                         remove_size -= offset - entry->vme_start;
4719
4720
4721                 if(submap_end < entry->vme_end) {
4722                         remove_size -=
4723                                 entry->vme_end - submap_end;
4724                 }
4725                 if(entry->is_sub_map) {
4726                         vm_map_submap_pmap_clean(
4727                                 sub_map,
4728                                 start,
4729                                 start + remove_size,
4730                                 entry->object.sub_map,
4731                                 entry->offset);
4732                 } else {
4733
4734                         if((map->mapped) && (map->ref_count)
4735                            && (entry->object.vm_object != NULL)) {
4736                                 vm_object_pmap_protect(
4737                                         entry->object.vm_object,
4738                                         entry->offset,
4739                                         remove_size,
4740                                         PMAP_NULL,
4741                                         entry->vme_start,
4742                                         VM_PROT_NONE);
4743                         } else {
4744                                 pmap_remove(map->pmap,
4745                                             (addr64_t)start,
4746                                             (addr64_t)(start + remove_size));
4747                         }
4748                 }
4749         }
4750
4751         entry = entry->vme_next;
4752
4753         while((entry != vm_map_to_entry(sub_map))
4754               && (entry->vme_start < submap_end)) {
4755                 remove_size = (entry->vme_end - entry->vme_start);
4756                 if(submap_end < entry->vme_end) {
4757                         remove_size -= entry->vme_end - submap_end;
4758                 }
4759                 if(entry->is_sub_map) {
4760                         vm_map_submap_pmap_clean(
4761                                 sub_map,
4762                                 (start + entry->vme_start) - offset,
4763                                 ((start + entry->vme_start) - offset) + remove_size,
4764                                 entry->object.sub_map,
4765                                 entry->offset);
4766                 } else {
4767                         if((map->mapped) && (map->ref_count)
4768                            && (entry->object.vm_object != NULL)) {
4769                                 vm_object_pmap_protect(
4770                                         entry->object.vm_object,
4771                                         entry->offset,
4772                                         remove_size,
4773                                         PMAP_NULL,
4774                                         entry->vme_start,
4775                                         VM_PROT_NONE);
4776                         } else {
4777                                 pmap_remove(map->pmap,
4778                                             (addr64_t)((start + entry->vme_start)
4779                                                        - offset),
4780                                             (addr64_t)(((start + entry->vme_start)
4781                                                         - offset) + remove_size));
4782                         }
4783                 }
4784                 entry = entry->vme_next;
4785         }
4786         vm_map_unlock_read(sub_map);
4787         return;
4788 }
4789
4790 /*
4791  *      vm_map_delete:  [ internal use only ]
4792  *
4793  *      Deallocates the given address range from the target map.
4794  *      Removes all user wirings. Unwires one kernel wiring if
4795  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4796  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4797  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4798  *
4799  *      This routine is called with map locked and leaves map locked.
4800  */
4801 static kern_return_t
4802 vm_map_delete(
4803         vm_map_t                map,
4804         vm_map_offset_t         start,
4805         vm_map_offset_t         end,
4806         int                     flags,
4807         vm_map_t                zap_map)
4808 {
4809         vm_map_entry_t          entry, next;
4810         struct   vm_map_entry   *first_entry, tmp_entry;
4811         register vm_map_offset_t s;
4812         register vm_object_t    object;
4813         boolean_t               need_wakeup;
4814         unsigned int            last_timestamp = ~0; /* unlikely value */
4815         int                     interruptible;
4816
4817         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4818                 THREAD_ABORTSAFE : THREAD_UNINT;
4819
4820         /*
4821          * All our DMA I/O operations in IOKit are currently done by
4822          * wiring through the map entries of the task requesting the I/O.
4823          * Because of this, we must always wait for kernel wirings
4824          * to go away on the entries before deleting them.
4825          *
4826          * Any caller who wants to actually remove a kernel wiring
4827          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4828          * properly remove one wiring instead of blasting through
4829          * them all.
4830          */
4831         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4832
4833         while(1) {
4834                 /*
4835                  *      Find the start of the region, and clip it
4836                  */
4837                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4838                         entry = first_entry;
4839                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4840                                 start = SUPERPAGE_ROUND_DOWN(start);
4841                                 continue;
4842                         }
4843                         if (start == entry->vme_start) {
4844                                 /*
4845                                  * No need to clip.  We don't want to cause
4846                                  * any unnecessary unnesting in this case...
4847                                  */
4848                         } else {
4849                                 vm_map_clip_start(map, entry, start);
4850                         }
4851
4852                         /*
4853                          *      Fix the lookup hint now, rather than each
4854                          *      time through the loop.
4855                          */
4856                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4857                 } else {
4858                         entry = first_entry->vme_next;
4859                 }
4860                 break;
4861         }
4862         if (entry->superpage_size)
4863                 end = SUPERPAGE_ROUND_UP(end);
4864
4865         need_wakeup = FALSE;
4866         /*
4867          *      Step through all entries in this region
4868          */
4869         s = entry->vme_start;
4870         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4871                 /*
4872                  * At this point, we have deleted all the memory entries
4873                  * between "start" and "s".  We still need to delete
4874                  * all memory entries between "s" and "end".
4875                  * While we were blocked and the map was unlocked, some
4876                  * new memory entries could have been re-allocated between
4877                  * "start" and "s" and we don't want to mess with those.
4878                  * Some of those entries could even have been re-assembled
4879                  * with an entry after "s" (in vm_map_simplify_entry()), so
4880                  * we may have to vm_map_clip_start() again.
4881                  */
4882
4883                 if (entry->vme_start >= s) {
4884                         /*
4885                          * This entry starts on or after "s"
4886                          * so no need to clip its start.
4887                          */
4888                 } else {
4889                         /*
4890                          * This entry has been re-assembled by a
4891                          * vm_map_simplify_entry().  We need to
4892                          * re-clip its start.
4893                          */
4894                         vm_map_clip_start(map, entry, s);
4895                 }
4896                 if (entry->vme_end <= end) {
4897                         /*
4898                          * This entry is going away completely, so no need
4899                          * to clip and possibly cause an unnecessary unnesting.
4900                          */
4901                 } else {
4902                         vm_map_clip_end(map, entry, end);
4903                 }
4904
4905                 if (entry->permanent) {
4906                         panic("attempt to remove permanent VM map entry "
4907                               "%p [0x%llx:0x%llx]\n",
4908                               entry, (uint64_t) s, (uint64_t) end);
4909                 }
4910
4911
4912                 if (entry->in_transition) {
4913                         wait_result_t wait_result;
4914
4915                         /*
4916                          * Another thread is wiring/unwiring this entry.
4917                          * Let the other thread know we are waiting.
4918                          */
4919                         assert(s == entry->vme_start);
4920                         entry->needs_wakeup = TRUE;
4921
4922                         /*
4923                          * wake up anybody waiting on entries that we have
4924                          * already unwired/deleted.
4925                          */
4926                         if (need_wakeup) {
4927                                 vm_map_entry_wakeup(map);
4928                                 need_wakeup = FALSE;
4929                         }
4930
4931                         wait_result = vm_map_entry_wait(map, interruptible);
4932
4933                         if (interruptible &&
4934                             wait_result == THREAD_INTERRUPTED) {
4935                                 /*
4936                                  * We do not clear the needs_wakeup flag,
4937                                  * since we cannot tell if we were the only one.
4938                                  */
4939                                 vm_map_unlock(map);
4940                                 return KERN_ABORTED;
4941                         }
4942
4943                         /*
4944                          * The entry could have been clipped or it
4945                          * may not exist anymore.  Look it up again.
4946                          */
4947                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4948                                 assert((map != kernel_map) &&
4949                                        (!entry->is_sub_map));
4950                                 /*
4951                                  * User: use the next entry
4952                                  */
4953                                 entry = first_entry->vme_next;
4954                                 s = entry->vme_start;
4955                         } else {
4956                                 entry = first_entry;
4957                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4958                         }
4959                         last_timestamp = map->timestamp;
4960                         continue;
4961                 } /* end in_transition */
4962
4963                 if (entry->wired_count) {
4964                         boolean_t       user_wire;
4965
4966                         user_wire = entry->user_wired_count > 0;
4967
4968                         /*
4969                          *      Remove a kernel wiring if requested
4970                          */
4971                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
4972                                 entry->wired_count--;
4973                         }
4974
4975                         /*
4976                          *      Remove all user wirings for proper accounting
4977                          */
4978                         if (entry->user_wired_count > 0) {
4979                                 while (entry->user_wired_count)
4980                                         subtract_wire_counts(map, entry, user_wire);
4981                         }
4982
4983                         if (entry->wired_count != 0) {
4984                                 assert(map != kernel_map);
4985                                 /*
4986                                  * Cannot continue.  Typical case is when
4987                                  * a user thread has physical io pending on
4988                                  * on this page.  Either wait for the
4989                                  * kernel wiring to go away or return an
4990                                  * error.
4991                                  */
4992                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
4993                                         wait_result_t wait_result;
4994
4995                                         assert(s == entry->vme_start);
4996                                         entry->needs_wakeup = TRUE;
4997                                         wait_result = vm_map_entry_wait(map,
4998                                                                         interruptible);
4999
5000                                         if (interruptible &&
5001                                             wait_result == THREAD_INTERRUPTED) {
5002                                                 /*
5003                                                  * We do not clear the
5004                                                  * needs_wakeup flag, since we
5005                                                  * cannot tell if we were the
5006                                                  * only one.
5007                                                  */
5008                                                 vm_map_unlock(map);
5009                                                 return KERN_ABORTED;
5010                                         }
5011
5012                                         /*
5013                                          * The entry could have been clipped or
5014                                          * it may not exist anymore.  Look it
5015                                          * up again.
5016                                          */
5017                                         if (!vm_map_lookup_entry(map, s,
5018                                                                  &first_entry)) {
5019                                                 assert(map != kernel_map);
5020                                                 /*
5021                                                  * User: use the next entry
5022                                                  */
5023                                                 entry = first_entry->vme_next;
5024                                                 s = entry->vme_start;
5025                                         } else {
5026                                                 entry = first_entry;
5027                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5028                                         }
5029                                         last_timestamp = map->timestamp;
5030                                         continue;
5031                                 }
5032                                 else {
5033                                         return KERN_FAILURE;
5034                                 }
5035                         }
5036
5037                         entry->in_transition = TRUE;
5038                         /*
5039                          * copy current entry.  see comment in vm_map_wire()
5040                          */
5041                         tmp_entry = *entry;
5042                         assert(s == entry->vme_start);
5043
5044                         /*
5045                          * We can unlock the map now. The in_transition
5046                          * state guarentees existance of the entry.
5047                          */
5048                         vm_map_unlock(map);
5049
5050                         if (tmp_entry.is_sub_map) {
5051                                 vm_map_t sub_map;
5052                                 vm_map_offset_t sub_start, sub_end;
5053                                 pmap_t pmap;
5054                                 vm_map_offset_t pmap_addr;
5055
5056
5057                                 sub_map = tmp_entry.object.sub_map;
5058                                 sub_start = tmp_entry.offset;
5059                                 sub_end = sub_start + (tmp_entry.vme_end -
5060                                                        tmp_entry.vme_start);
5061                                 if (tmp_entry.use_pmap) {
5062                                         pmap = sub_map->pmap;
5063                                         pmap_addr = tmp_entry.vme_start;
5064                                 } else {
5065                                         pmap = map->pmap;
5066                                         pmap_addr = tmp_entry.vme_start;
5067                                 }
5068                                 (void) vm_map_unwire_nested(sub_map,
5069                                                             sub_start, sub_end,
5070                                                             user_wire,
5071                                                             pmap, pmap_addr);
5072                         } else {
5073
5074                                 vm_fault_unwire(map, &tmp_entry,
5075                                                 tmp_entry.object.vm_object == kernel_object,
5076                                                 map->pmap, tmp_entry.vme_start);
5077                         }
5078
5079                         vm_map_lock(map);
5080
5081                         if (last_timestamp+1 != map->timestamp) {
5082                                 /*
5083                                  * Find the entry again.  It could have
5084                                  * been clipped after we unlocked the map.
5085                                  */
5086                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
5087                                         assert((map != kernel_map) &&
5088                                                (!entry->is_sub_map));
5089                                         first_entry = first_entry->vme_next;
5090                                         s = first_entry->vme_start;
5091                                 } else {
5092                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5093                                 }
5094                         } else {
5095                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5096                                 first_entry = entry;
5097                         }
5098
5099                         last_timestamp = map->timestamp;
5100
5101                         entry = first_entry;
5102                         while ((entry != vm_map_to_entry(map)) &&
5103                                (entry->vme_start < tmp_entry.vme_end)) {
5104                                 assert(entry->in_transition);
5105                                 entry->in_transition = FALSE;
5106                                 if (entry->needs_wakeup) {
5107                                         entry->needs_wakeup = FALSE;
5108                                         need_wakeup = TRUE;
5109                                 }
5110                                 entry = entry->vme_next;
5111                         }
5112                         /*
5113                          * We have unwired the entry(s).  Go back and
5114                          * delete them.
5115                          */
5116                         entry = first_entry;
5117                         continue;
5118                 }
5119
5120                 /* entry is unwired */
5121                 assert(entry->wired_count == 0);
5122                 assert(entry->user_wired_count == 0);
5123
5124                 assert(s == entry->vme_start);
5125
5126                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5127                         /*
5128                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5129                          * vm_map_delete(), some map entries might have been
5130                          * transferred to a "zap_map", which doesn't have a
5131                          * pmap.  The original pmap has already been flushed
5132                          * in the vm_map_delete() call targeting the original
5133                          * map, but when we get to destroying the "zap_map",
5134                          * we don't have any pmap to flush, so let's just skip
5135                          * all this.
5136                          */
5137                 } else if (entry->is_sub_map) {
5138                         if (entry->use_pmap) {
5139 #ifndef NO_NESTED_PMAP
5140                                 pmap_unnest(map->pmap,
5141                                             (addr64_t)entry->vme_start,
5142                                             entry->vme_end - entry->vme_start);
5143 #endif  /* NO_NESTED_PMAP */
5144                                 if ((map->mapped) && (map->ref_count)) {
5145                                         /* clean up parent map/maps */
5146                                         vm_map_submap_pmap_clean(
5147                                                 map, entry->vme_start,
5148                                                 entry->vme_end,
5149                                                 entry->object.sub_map,
5150                                                 entry->offset);
5151                                 }
5152                         } else {
5153                                 vm_map_submap_pmap_clean(
5154                                         map, entry->vme_start, entry->vme_end,
5155                                         entry->object.sub_map,
5156                                         entry->offset);
5157                         }
5158                 } else if (entry->object.vm_object != kernel_object) {
5159                         object = entry->object.vm_object;
5160                         if((map->mapped) && (map->ref_count)) {
5161                                 vm_object_pmap_protect(
5162                                         object, entry->offset,
5163                                         entry->vme_end - entry->vme_start,
5164                                         PMAP_NULL,
5165                                         entry->vme_start,
5166                                         VM_PROT_NONE);
5167                         } else {
5168                                 pmap_remove(map->pmap,
5169                                             (addr64_t)entry->vme_start,
5170                                             (addr64_t)entry->vme_end);
5171                         }
5172                 }
5173
5174                 /*
5175                  * All pmap mappings for this map entry must have been
5176                  * cleared by now.
5177                  */
5178                 assert(vm_map_pmap_is_empty(map,
5179                                             entry->vme_start,
5180                                             entry->vme_end));
5181
5182                 next = entry->vme_next;
5183                 s = next->vme_start;
5184                 last_timestamp = map->timestamp;
5185
5186                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5187                     zap_map != VM_MAP_NULL) {
5188                         vm_map_size_t entry_size;
5189                         /*
5190                          * The caller wants to save the affected VM map entries
5191                          * into the "zap_map".  The caller will take care of
5192                          * these entries.
5193                          */
5194                         /* unlink the entry from "map" ... */
5195                         vm_map_entry_unlink(map, entry);
5196                         /* ... and add it to the end of the "zap_map" */
5197                         vm_map_entry_link(zap_map,
5198                                           vm_map_last_entry(zap_map),
5199                                           entry);
5200                         entry_size = entry->vme_end - entry->vme_start;
5201                         map->size -= entry_size;
5202                         zap_map->size += entry_size;
5203                         /* we didn't unlock the map, so no timestamp increase */
5204                         last_timestamp--;
5205                 } else {
5206                         vm_map_entry_delete(map, entry);
5207                         /* vm_map_entry_delete unlocks the map */
5208                         vm_map_lock(map);
5209                 }
5210
5211                 entry = next;
5212
5213                 if(entry == vm_map_to_entry(map)) {
5214                         break;
5215                 }
5216                 if (last_timestamp+1 != map->timestamp) {
5217                         /*
5218                          * we are responsible for deleting everything
5219                          * from the give space, if someone has interfered
5220                          * we pick up where we left off, back fills should
5221                          * be all right for anyone except map_delete and
5222                          * we have to assume that the task has been fully
5223                          * disabled before we get here
5224                          */
5225                         if (!vm_map_lookup_entry(map, s, &entry)){
5226                                 entry = entry->vme_next;
5227                                 s = entry->vme_start;
5228                         } else {
5229                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5230                         }
5231                         /*
5232                          * others can not only allocate behind us, we can
5233                          * also see coalesce while we don't have the map lock
5234                          */
5235                         if(entry == vm_map_to_entry(map)) {
5236                                 break;
5237                         }
5238                 }
5239                 last_timestamp = map->timestamp;
5240         }
5241
5242         if (map->wait_for_space)
5243                 thread_wakeup((event_t) map);
5244         /*
5245          * wake up anybody waiting on entries that we have already deleted.
5246          */
5247         if (need_wakeup)
5248                 vm_map_entry_wakeup(map);
5249
5250         return KERN_SUCCESS;
5251 }
5252
5253 /*
5254  *      vm_map_remove:
5255  *
5256  *      Remove the given address range from the target map.
5257  *      This is the exported form of vm_map_delete.
5258  */
5259 kern_return_t
5260 vm_map_remove(
5261         register vm_map_t       map,
5262         register vm_map_offset_t        start,
5263         register vm_map_offset_t        end,
5264         register boolean_t      flags)
5265 {
5266         register kern_return_t  result;
5267
5268         vm_map_lock(map);
5269         VM_MAP_RANGE_CHECK(map, start, end);
5270         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5271         vm_map_unlock(map);
5272
5273         return(result);
5274 }
5275
5276
5277 /*
5278  *      Routine:        vm_map_copy_discard
5279  *
5280  *      Description:
5281  *              Dispose of a map copy object (returned by
5282  *              vm_map_copyin).
5283  */
5284 void
5285 vm_map_copy_discard(
5286         vm_map_copy_t   copy)
5287 {
5288         if (copy == VM_MAP_COPY_NULL)
5289                 return;
5290
5291         switch (copy->type) {
5292         case VM_MAP_COPY_ENTRY_LIST:
5293                 while (vm_map_copy_first_entry(copy) !=
5294                        vm_map_copy_to_entry(copy)) {
5295                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5296
5297                         vm_map_copy_entry_unlink(copy, entry);
5298                         vm_object_deallocate(entry->object.vm_object);
5299                         vm_map_copy_entry_dispose(copy, entry);
5300                 }
5301                 break;
5302         case VM_MAP_COPY_OBJECT:
5303                 vm_object_deallocate(copy->cpy_object);
5304                 break;
5305         case VM_MAP_COPY_KERNEL_BUFFER:
5306
5307                 /*
5308                  * The vm_map_copy_t and possibly the data buffer were
5309                  * allocated by a single call to kalloc(), i.e. the
5310                  * vm_map_copy_t was not allocated out of the zone.
5311                  */
5312                 kfree(copy, copy->cpy_kalloc_size);
5313                 return;
5314         }
5315         zfree(vm_map_copy_zone, copy);
5316 }
5317
5318 /*
5319  *      Routine:        vm_map_copy_copy
5320  *
5321  *      Description:
5322  *                      Move the information in a map copy object to
5323  *                      a new map copy object, leaving the old one
5324  *                      empty.
5325  *
5326  *                      This is used by kernel routines that need
5327  *                      to look at out-of-line data (in copyin form)
5328  *                      before deciding whether to return SUCCESS.
5329  *                      If the routine returns FAILURE, the original
5330  *                      copy object will be deallocated; therefore,
5331  *                      these routines must make a copy of the copy
5332  *                      object and leave the original empty so that
5333  *                      deallocation will not fail.
5334  */
5335 vm_map_copy_t
5336 vm_map_copy_copy(
5337         vm_map_copy_t   copy)
5338 {
5339         vm_map_copy_t   new_copy;
5340
5341         if (copy == VM_MAP_COPY_NULL)
5342                 return VM_MAP_COPY_NULL;
5343
5344         /*
5345          * Allocate a new copy object, and copy the information
5346          * from the old one into it.
5347          */
5348
5349         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5350         *new_copy = *copy;
5351
5352         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5353                 /*
5354                  * The links in the entry chain must be
5355                  * changed to point to the new copy object.
5356                  */
5357                 vm_map_copy_first_entry(copy)->vme_prev
5358                         = vm_map_copy_to_entry(new_copy);
5359                 vm_map_copy_last_entry(copy)->vme_next
5360                         = vm_map_copy_to_entry(new_copy);
5361         }
5362
5363         /*
5364          * Change the old copy object into one that contains
5365          * nothing to be deallocated.
5366          */
5367         copy->type = VM_MAP_COPY_OBJECT;
5368         copy->cpy_object = VM_OBJECT_NULL;
5369
5370         /*
5371          * Return the new object.
5372          */
5373         return new_copy;
5374 }
5375
5376 static kern_return_t
5377 vm_map_overwrite_submap_recurse(
5378         vm_map_t        dst_map,
5379         vm_map_offset_t dst_addr,
5380         vm_map_size_t   dst_size)
5381 {
5382         vm_map_offset_t dst_end;
5383         vm_map_entry_t  tmp_entry;
5384         vm_map_entry_t  entry;
5385         kern_return_t   result;
5386         boolean_t       encountered_sub_map = FALSE;
5387
5388
5389
5390         /*
5391          *      Verify that the destination is all writeable
5392          *      initially.  We have to trunc the destination
5393          *      address and round the copy size or we'll end up
5394          *      splitting entries in strange ways.
5395          */
5396
5397         dst_end = vm_map_round_page(dst_addr + dst_size);
5398         vm_map_lock(dst_map);
5399
5400 start_pass_1:
5401         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5402                 vm_map_unlock(dst_map);
5403                 return(KERN_INVALID_ADDRESS);
5404         }
5405
5406         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5407         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5408
5409         for (entry = tmp_entry;;) {
5410                 vm_map_entry_t  next;
5411
5412                 next = entry->vme_next;
5413                 while(entry->is_sub_map) {
5414                         vm_map_offset_t sub_start;
5415                         vm_map_offset_t sub_end;
5416                         vm_map_offset_t local_end;
5417
5418                         if (entry->in_transition) {
5419                                 /*
5420                                  * Say that we are waiting, and wait for entry.
5421                                  */
5422                                 entry->needs_wakeup = TRUE;
5423                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5424
5425                                 goto start_pass_1;
5426                         }
5427
5428                         encountered_sub_map = TRUE;
5429                         sub_start = entry->offset;
5430
5431                         if(entry->vme_end < dst_end)
5432                                 sub_end = entry->vme_end;
5433                         else
5434                                 sub_end = dst_end;
5435                         sub_end -= entry->vme_start;
5436                         sub_end += entry->offset;
5437                         local_end = entry->vme_end;
5438                         vm_map_unlock(dst_map);
5439
5440                         result = vm_map_overwrite_submap_recurse(
5441                                 entry->object.sub_map,
5442                                 sub_start,
5443                                 sub_end - sub_start);
5444
5445                         if(result != KERN_SUCCESS)
5446                                 return result;
5447                         if (dst_end <= entry->vme_end)
5448                                 return KERN_SUCCESS;
5449                         vm_map_lock(dst_map);
5450                         if(!vm_map_lookup_entry(dst_map, local_end,
5451                                                 &tmp_entry)) {
5452                                 vm_map_unlock(dst_map);
5453                                 return(KERN_INVALID_ADDRESS);
5454                         }
5455                         entry = tmp_entry;
5456                         next = entry->vme_next;
5457                 }
5458
5459                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5460                         vm_map_unlock(dst_map);
5461                         return(KERN_PROTECTION_FAILURE);
5462                 }
5463
5464                 /*
5465                  *      If the entry is in transition, we must wait
5466                  *      for it to exit that state.  Anything could happen
5467                  *      when we unlock the map, so start over.
5468                  */
5469                 if (entry->in_transition) {
5470
5471                         /*
5472                          * Say that we are waiting, and wait for entry.
5473                          */
5474                         entry->needs_wakeup = TRUE;
5475                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5476
5477                         goto start_pass_1;
5478                 }
5479
5480 /*
5481  *              our range is contained completely within this map entry
5482  */
5483                 if (dst_end <= entry->vme_end) {
5484                         vm_map_unlock(dst_map);
5485                         return KERN_SUCCESS;
5486                 }
5487 /*
5488  *              check that range specified is contiguous region
5489  */
5490                 if ((next == vm_map_to_entry(dst_map)) ||
5491                     (next->vme_start != entry->vme_end)) {
5492                         vm_map_unlock(dst_map);
5493                         return(KERN_INVALID_ADDRESS);
5494                 }
5495
5496                 /*
5497                  *      Check for permanent objects in the destination.
5498                  */
5499                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5500                     ((!entry->object.vm_object->internal) ||
5501                      (entry->object.vm_object->true_share))) {
5502                         if(encountered_sub_map) {
5503                                 vm_map_unlock(dst_map);
5504                                 return(KERN_FAILURE);
5505                         }
5506                 }
5507
5508
5509                 entry = next;
5510         }/* for */
5511         vm_map_unlock(dst_map);
5512         return(KERN_SUCCESS);
5513 }
5514
5515 /*
5516  *      Routine:        vm_map_copy_overwrite
5517  *
5518  *      Description:
5519  *              Copy the memory described by the map copy
5520  *              object (copy; returned by vm_map_copyin) onto
5521  *              the specified destination region (dst_map, dst_addr).
5522  *              The destination must be writeable.
5523  *
5524  *              Unlike vm_map_copyout, this routine actually
5525  *              writes over previously-mapped memory.  If the
5526  *              previous mapping was to a permanent (user-supplied)
5527  *              memory object, it is preserved.
5528  *
5529  *              The attributes (protection and inheritance) of the
5530  *              destination region are preserved.
5531  *
5532  *              If successful, consumes the copy object.
5533  *              Otherwise, the caller is responsible for it.
5534  *
5535  *      Implementation notes:
5536  *              To overwrite aligned temporary virtual memory, it is
5537  *              sufficient to remove the previous mapping and insert
5538  *              the new copy.  This replacement is done either on
5539  *              the whole region (if no permanent virtual memory
5540  *              objects are embedded in the destination region) or
5541  *              in individual map entries.
5542  *
5543  *              To overwrite permanent virtual memory , it is necessary
5544  *              to copy each page, as the external memory management
5545  *              interface currently does not provide any optimizations.
5546  *
5547  *              Unaligned memory also has to be copied.  It is possible
5548  *              to use 'vm_trickery' to copy the aligned data.  This is
5549  *              not done but not hard to implement.
5550  *
5551  *              Once a page of permanent memory has been overwritten,
5552  *              it is impossible to interrupt this function; otherwise,
5553  *              the call would be neither atomic nor location-independent.
5554  *              The kernel-state portion of a user thread must be
5555  *              interruptible.
5556  *
5557  *              It may be expensive to forward all requests that might
5558  *              overwrite permanent memory (vm_write, vm_copy) to
5559  *              uninterruptible kernel threads.  This routine may be
5560  *              called by interruptible threads; however, success is
5561  *              not guaranteed -- if the request cannot be performed
5562  *              atomically and interruptibly, an error indication is
5563  *              returned.
5564  */
5565
5566 static kern_return_t
5567 vm_map_copy_overwrite_nested(
5568         vm_map_t                dst_map,
5569         vm_map_address_t        dst_addr,
5570         vm_map_copy_t           copy,
5571         boolean_t               interruptible,
5572         pmap_t                  pmap)
5573 {
5574         vm_map_offset_t         dst_end;
5575         vm_map_entry_t          tmp_entry;
5576         vm_map_entry_t          entry;
5577         kern_return_t           kr;
5578         boolean_t               aligned = TRUE;
5579         boolean_t               contains_permanent_objects = FALSE;
5580         boolean_t               encountered_sub_map = FALSE;
5581         vm_map_offset_t         base_addr;
5582         vm_map_size_t           copy_size;
5583         vm_map_size_t           total_size;
5584
5585
5586         /*
5587          *      Check for null copy object.
5588          */
5589
5590         if (copy == VM_MAP_COPY_NULL)
5591                 return(KERN_SUCCESS);
5592
5593         /*
5594          *      Check for special kernel buffer allocated
5595          *      by new_ipc_kmsg_copyin.
5596          */
5597
5598         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5599                 return(vm_map_copyout_kernel_buffer(
5600                                dst_map, &dst_addr,
5601                                copy, TRUE));
5602         }
5603
5604         /*
5605          *      Only works for entry lists at the moment.  Will
5606          *      support page lists later.
5607          */
5608
5609         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5610
5611         if (copy->size == 0) {
5612                 vm_map_copy_discard(copy);
5613                 return(KERN_SUCCESS);
5614         }
5615
5616         /*
5617          *      Verify that the destination is all writeable
5618          *      initially.  We have to trunc the destination
5619          *      address and round the copy size or we'll end up
5620          *      splitting entries in strange ways.
5621          */
5622
5623         if (!page_aligned(copy->size) ||
5624             !page_aligned (copy->offset) ||
5625             !page_aligned (dst_addr))
5626         {
5627                 aligned = FALSE;
5628                 dst_end = vm_map_round_page(dst_addr + copy->size);
5629         } else {
5630                 dst_end = dst_addr + copy->size;
5631         }
5632
5633         vm_map_lock(dst_map);
5634
5635         /* LP64todo - remove this check when vm_map_commpage64()
5636          * no longer has to stuff in a map_entry for the commpage
5637          * above the map's max_offset.
5638          */
5639         if (dst_addr >= dst_map->max_offset) {
5640                 vm_map_unlock(dst_map);
5641                 return(KERN_INVALID_ADDRESS);
5642         }
5643
5644 start_pass_1:
5645         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5646                 vm_map_unlock(dst_map);
5647                 return(KERN_INVALID_ADDRESS);
5648         }
5649         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5650         for (entry = tmp_entry;;) {
5651                 vm_map_entry_t  next = entry->vme_next;
5652
5653                 while(entry->is_sub_map) {
5654                         vm_map_offset_t sub_start;
5655                         vm_map_offset_t sub_end;
5656                         vm_map_offset_t local_end;
5657
5658                         if (entry->in_transition) {
5659
5660                                 /*
5661                                  * Say that we are waiting, and wait for entry.
5662                                  */
5663                                 entry->needs_wakeup = TRUE;
5664                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5665
5666                                 goto start_pass_1;
5667                         }
5668
5669                         local_end = entry->vme_end;
5670                         if (!(entry->needs_copy)) {
5671                                 /* if needs_copy we are a COW submap */
5672                                 /* in such a case we just replace so */
5673                                 /* there is no need for the follow-  */
5674                                 /* ing check.                        */
5675                                 encountered_sub_map = TRUE;
5676                                 sub_start = entry->offset;
5677
5678                                 if(entry->vme_end < dst_end)
5679                                         sub_end = entry->vme_end;
5680                                 else
5681                                         sub_end = dst_end;
5682                                 sub_end -= entry->vme_start;
5683                                 sub_end += entry->offset;
5684                                 vm_map_unlock(dst_map);
5685
5686                                 kr = vm_map_overwrite_submap_recurse(
5687                                         entry->object.sub_map,
5688                                         sub_start,
5689                                         sub_end - sub_start);
5690                                 if(kr != KERN_SUCCESS)
5691                                         return kr;
5692                                 vm_map_lock(dst_map);
5693                         }
5694
5695                         if (dst_end <= entry->vme_end)
5696                                 goto start_overwrite;
5697                         if(!vm_map_lookup_entry(dst_map, local_end,
5698                                                 &entry)) {
5699                                 vm_map_unlock(dst_map);
5700                                 return(KERN_INVALID_ADDRESS);
5701                         }
5702                         next = entry->vme_next;
5703                 }
5704
5705                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5706                         vm_map_unlock(dst_map);
5707                         return(KERN_PROTECTION_FAILURE);
5708                 }
5709
5710                 /*
5711                  *      If the entry is in transition, we must wait
5712                  *      for it to exit that state.  Anything could happen
5713                  *      when we unlock the map, so start over.
5714                  */
5715                 if (entry->in_transition) {
5716
5717                         /*
5718                          * Say that we are waiting, and wait for entry.
5719                          */
5720                         entry->needs_wakeup = TRUE;
5721                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5722
5723                         goto start_pass_1;
5724                 }
5725
5726 /*
5727  *              our range is contained completely within this map entry
5728  */
5729                 if (dst_end <= entry->vme_end)
5730                         break;
5731 /*
5732  *              check that range specified is contiguous region
5733  */
5734                 if ((next == vm_map_to_entry(dst_map)) ||
5735                     (next->vme_start != entry->vme_end)) {
5736                         vm_map_unlock(dst_map);
5737                         return(KERN_INVALID_ADDRESS);
5738                 }
5739
5740
5741                 /*
5742                  *      Check for permanent objects in the destination.
5743                  */
5744                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5745                     ((!entry->object.vm_object->internal) ||
5746                      (entry->object.vm_object->true_share))) {
5747                         contains_permanent_objects = TRUE;
5748                 }
5749
5750                 entry = next;
5751         }/* for */
5752
5753 start_overwrite:
5754         /*
5755          *      If there are permanent objects in the destination, then
5756          *      the copy cannot be interrupted.
5757          */
5758
5759         if (interruptible && contains_permanent_objects) {
5760                 vm_map_unlock(dst_map);
5761                 return(KERN_FAILURE);   /* XXX */
5762         }
5763
5764         /*
5765          *
5766          *      Make a second pass, overwriting the data
5767          *      At the beginning of each loop iteration,
5768          *      the next entry to be overwritten is "tmp_entry"
5769          *      (initially, the value returned from the lookup above),
5770          *      and the starting address expected in that entry
5771          *      is "start".
5772          */
5773
5774         total_size = copy->size;
5775         if(encountered_sub_map) {
5776                 copy_size = 0;
5777                 /* re-calculate tmp_entry since we've had the map */
5778                 /* unlocked */
5779                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5780                         vm_map_unlock(dst_map);
5781                         return(KERN_INVALID_ADDRESS);
5782                 }
5783         } else {
5784                 copy_size = copy->size;
5785         }
5786
5787         base_addr = dst_addr;
5788         while(TRUE) {
5789                 /* deconstruct the copy object and do in parts */
5790                 /* only in sub_map, interruptable case */
5791                 vm_map_entry_t  copy_entry;
5792                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5793                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5794                 int             nentries;
5795                 int             remaining_entries = 0;
5796                 vm_map_offset_t new_offset = 0;
5797
5798                 for (entry = tmp_entry; copy_size == 0;) {
5799                         vm_map_entry_t  next;
5800
5801                         next = entry->vme_next;
5802
5803                         /* tmp_entry and base address are moved along */
5804                         /* each time we encounter a sub-map.  Otherwise */
5805                         /* entry can outpase tmp_entry, and the copy_size */
5806                         /* may reflect the distance between them */
5807                         /* if the current entry is found to be in transition */
5808                         /* we will start over at the beginning or the last */
5809                         /* encounter of a submap as dictated by base_addr */
5810                         /* we will zero copy_size accordingly. */
5811                         if (entry->in_transition) {
5812                                 /*
5813                                  * Say that we are waiting, and wait for entry.
5814                                  */
5815                                 entry->needs_wakeup = TRUE;
5816                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5817
5818                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5819                                                         &tmp_entry)) {
5820                                         vm_map_unlock(dst_map);
5821                                         return(KERN_INVALID_ADDRESS);
5822                                 }
5823                                 copy_size = 0;
5824                                 entry = tmp_entry;
5825                                 continue;
5826                         }
5827                         if(entry->is_sub_map) {
5828                                 vm_map_offset_t sub_start;
5829                                 vm_map_offset_t sub_end;
5830                                 vm_map_offset_t local_end;
5831
5832                                 if (entry->needs_copy) {
5833                                         /* if this is a COW submap */
5834                                         /* just back the range with a */
5835                                         /* anonymous entry */
5836                                         if(entry->vme_end < dst_end)
5837                                                 sub_end = entry->vme_end;
5838                                         else
5839                                                 sub_end = dst_end;
5840                                         if(entry->vme_start < base_addr)
5841                                                 sub_start = base_addr;
5842                                         else
5843                                                 sub_start = entry->vme_start;
5844                                         vm_map_clip_end(
5845                                                 dst_map, entry, sub_end);
5846                                         vm_map_clip_start(
5847                                                 dst_map, entry, sub_start);
5848                                         assert(!entry->use_pmap);
5849                                         entry->is_sub_map = FALSE;
5850                                         vm_map_deallocate(
5851                                                 entry->object.sub_map);
5852                                         entry->object.sub_map = NULL;
5853                                         entry->is_shared = FALSE;
5854                                         entry->needs_copy = FALSE;
5855                                         entry->offset = 0;
5856                                         /*
5857                                          * XXX FBDP
5858                                          * We should propagate the protections
5859                                          * of the submap entry here instead
5860                                          * of forcing them to VM_PROT_ALL...
5861                                          * Or better yet, we should inherit
5862                                          * the protection of the copy_entry.
5863                                          */
5864                                         entry->protection = VM_PROT_ALL;
5865                                         entry->max_protection = VM_PROT_ALL;
5866                                         entry->wired_count = 0;
5867                                         entry->user_wired_count = 0;
5868                                         if(entry->inheritance
5869                                            == VM_INHERIT_SHARE)
5870                                                 entry->inheritance = VM_INHERIT_COPY;
5871                                         continue;
5872                                 }
5873                                 /* first take care of any non-sub_map */
5874                                 /* entries to send */
5875                                 if(base_addr < entry->vme_start) {
5876                                         /* stuff to send */
5877                                         copy_size =
5878                                                 entry->vme_start - base_addr;
5879                                         break;
5880                                 }
5881                                 sub_start = entry->offset;
5882
5883                                 if(entry->vme_end < dst_end)
5884                                         sub_end = entry->vme_end;
5885                                 else
5886                                         sub_end = dst_end;
5887                                 sub_end -= entry->vme_start;
5888                                 sub_end += entry->offset;
5889                                 local_end = entry->vme_end;
5890                                 vm_map_unlock(dst_map);
5891                                 copy_size = sub_end - sub_start;
5892
5893                                 /* adjust the copy object */
5894                                 if (total_size > copy_size) {
5895                                         vm_map_size_t   local_size = 0;
5896                                         vm_map_size_t   entry_size;
5897
5898                                         nentries = 1;
5899                                         new_offset = copy->offset;
5900                                         copy_entry = vm_map_copy_first_entry(copy);
5901                                         while(copy_entry !=
5902                                               vm_map_copy_to_entry(copy)){
5903                                                 entry_size = copy_entry->vme_end -
5904                                                         copy_entry->vme_start;
5905                                                 if((local_size < copy_size) &&
5906                                                    ((local_size + entry_size)
5907                                                     >= copy_size)) {
5908                                                         vm_map_copy_clip_end(copy,
5909                                                                              copy_entry,
5910                                                                              copy_entry->vme_start +
5911                                                                              (copy_size - local_size));
5912                                                         entry_size = copy_entry->vme_end -
5913                                                                 copy_entry->vme_start;
5914                                                         local_size += entry_size;
5915                                                         new_offset += entry_size;
5916                                                 }
5917                                                 if(local_size >= copy_size) {
5918                                                         next_copy = copy_entry->vme_next;
5919                                                         copy_entry->vme_next =
5920                                                                 vm_map_copy_to_entry(copy);
5921                                                         previous_prev =
5922                                                                 copy->cpy_hdr.links.prev;
5923                                                         copy->cpy_hdr.links.prev = copy_entry;
5924                                                         copy->size = copy_size;
5925                                                         remaining_entries =
5926                                                                 copy->cpy_hdr.nentries;
5927                                                         remaining_entries -= nentries;
5928                                                         copy->cpy_hdr.nentries = nentries;
5929                                                         break;
5930                                                 } else {
5931                                                         local_size += entry_size;
5932                                                         new_offset += entry_size;
5933                                                         nentries++;
5934                                                 }
5935                                                 copy_entry = copy_entry->vme_next;
5936                                         }
5937                                 }
5938
5939                                 if((entry->use_pmap) && (pmap == NULL)) {
5940                                         kr = vm_map_copy_overwrite_nested(
5941                                                 entry->object.sub_map,
5942                                                 sub_start,
5943                                                 copy,
5944                                                 interruptible,
5945                                                 entry->object.sub_map->pmap);
5946                                 } else if (pmap != NULL) {
5947                                         kr = vm_map_copy_overwrite_nested(
5948                                                 entry->object.sub_map,
5949                                                 sub_start,
5950                                                 copy,
5951                                                 interruptible, pmap);
5952                                 } else {
5953                                         kr = vm_map_copy_overwrite_nested(
5954                                                 entry->object.sub_map,
5955                                                 sub_start,
5956                                                 copy,
5957                                                 interruptible,
5958                                                 dst_map->pmap);
5959                                 }
5960                                 if(kr != KERN_SUCCESS) {
5961                                         if(next_copy != NULL) {
5962                                                 copy->cpy_hdr.nentries +=
5963                                                         remaining_entries;
5964                                                 copy->cpy_hdr.links.prev->vme_next =
5965                                                         next_copy;
5966                                                 copy->cpy_hdr.links.prev
5967                                                         = previous_prev;
5968                                                 copy->size = total_size;
5969                                         }
5970                                         return kr;
5971                                 }
5972                                 if (dst_end <= local_end) {
5973                                         return(KERN_SUCCESS);
5974                                 }
5975                                 /* otherwise copy no longer exists, it was */
5976                                 /* destroyed after successful copy_overwrite */
5977                                 copy = (vm_map_copy_t)
5978                                         zalloc(vm_map_copy_zone);
5979                                 vm_map_copy_first_entry(copy) =
5980                                         vm_map_copy_last_entry(copy) =
5981                                         vm_map_copy_to_entry(copy);
5982                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
5983                                 copy->offset = new_offset;
5984
5985                                 total_size -= copy_size;
5986                                 copy_size = 0;
5987                                 /* put back remainder of copy in container */
5988                                 if(next_copy != NULL) {
5989                                         copy->cpy_hdr.nentries = remaining_entries;
5990                                         copy->cpy_hdr.links.next = next_copy;
5991                                         copy->cpy_hdr.links.prev = previous_prev;
5992                                         copy->size = total_size;
5993                                         next_copy->vme_prev =
5994                                                 vm_map_copy_to_entry(copy);
5995                                         next_copy = NULL;
5996                                 }
5997                                 base_addr = local_end;
5998                                 vm_map_lock(dst_map);
5999                                 if(!vm_map_lookup_entry(dst_map,
6000                                                         local_end, &tmp_entry)) {
6001                                         vm_map_unlock(dst_map);
6002                                         return(KERN_INVALID_ADDRESS);
6003                                 }
6004                                 entry = tmp_entry;
6005                                 continue;
6006                         }
6007                         if (dst_end <= entry->vme_end) {
6008                                 copy_size = dst_end - base_addr;
6009                                 break;
6010                         }
6011
6012                         if ((next == vm_map_to_entry(dst_map)) ||
6013                             (next->vme_start != entry->vme_end)) {
6014                                 vm_map_unlock(dst_map);
6015                                 return(KERN_INVALID_ADDRESS);
6016                         }
6017
6018                         entry = next;
6019                 }/* for */
6020
6021                 next_copy = NULL;
6022                 nentries = 1;
6023
6024                 /* adjust the copy object */
6025                 if (total_size > copy_size) {
6026                         vm_map_size_t   local_size = 0;
6027                         vm_map_size_t   entry_size;
6028
6029                         new_offset = copy->offset;
6030                         copy_entry = vm_map_copy_first_entry(copy);
6031                         while(copy_entry != vm_map_copy_to_entry(copy)) {
6032                                 entry_size = copy_entry->vme_end -
6033                                         copy_entry->vme_start;
6034                                 if((local_size < copy_size) &&
6035                                    ((local_size + entry_size)
6036                                     >= copy_size)) {
6037                                         vm_map_copy_clip_end(copy, copy_entry,
6038                                                              copy_entry->vme_start +
6039                                                              (copy_size - local_size));
6040                                         entry_size = copy_entry->vme_end -
6041                                                 copy_entry->vme_start;
6042                                         local_size += entry_size;
6043                                         new_offset += entry_size;
6044                                 }
6045                                 if(local_size >= copy_size) {
6046                                         next_copy = copy_entry->vme_next;
6047                                         copy_entry->vme_next =
6048                                                 vm_map_copy_to_entry(copy);
6049                                         previous_prev =
6050                                                 copy->cpy_hdr.links.prev;
6051                                         copy->cpy_hdr.links.prev = copy_entry;
6052                                         copy->size = copy_size;
6053                                         remaining_entries =
6054                                                 copy->cpy_hdr.nentries;
6055                                         remaining_entries -= nentries;
6056                                         copy->cpy_hdr.nentries = nentries;
6057                                         break;
6058                                 } else {
6059                                         local_size += entry_size;
6060                                         new_offset += entry_size;
6061                                         nentries++;
6062                                 }
6063                                 copy_entry = copy_entry->vme_next;
6064                         }
6065                 }
6066
6067                 if (aligned) {
6068                         pmap_t  local_pmap;
6069
6070                         if(pmap)
6071                                 local_pmap = pmap;
6072                         else
6073                                 local_pmap = dst_map->pmap;
6074
6075                         if ((kr =  vm_map_copy_overwrite_aligned(
6076                                      dst_map, tmp_entry, copy,
6077                                      base_addr, local_pmap)) != KERN_SUCCESS) {
6078                                 if(next_copy != NULL) {
6079                                         copy->cpy_hdr.nentries +=
6080                                                 remaining_entries;
6081                                         copy->cpy_hdr.links.prev->vme_next =
6082                                                 next_copy;
6083                                         copy->cpy_hdr.links.prev =
6084                                                 previous_prev;
6085                                         copy->size += copy_size;
6086                                 }
6087                                 return kr;
6088                         }
6089                         vm_map_unlock(dst_map);
6090                 } else {
6091                         /*
6092                          * Performance gain:
6093                          *
6094                          * if the copy and dst address are misaligned but the same
6095                          * offset within the page we can copy_not_aligned the
6096                          * misaligned parts and copy aligned the rest.  If they are
6097                          * aligned but len is unaligned we simply need to copy
6098                          * the end bit unaligned.  We'll need to split the misaligned
6099                          * bits of the region in this case !
6100                          */
6101                         /* ALWAYS UNLOCKS THE dst_map MAP */
6102                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
6103                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6104                                 if(next_copy != NULL) {
6105                                         copy->cpy_hdr.nentries +=
6106                                                 remaining_entries;
6107                                         copy->cpy_hdr.links.prev->vme_next =
6108                                                 next_copy;
6109                                         copy->cpy_hdr.links.prev =
6110                                                 previous_prev;
6111                                         copy->size += copy_size;
6112                                 }
6113                                 return kr;
6114                         }
6115                 }
6116                 total_size -= copy_size;
6117                 if(total_size == 0)
6118                         break;
6119                 base_addr += copy_size;
6120                 copy_size = 0;
6121                 copy->offset = new_offset;
6122                 if(next_copy != NULL) {
6123                         copy->cpy_hdr.nentries = remaining_entries;
6124                         copy->cpy_hdr.links.next = next_copy;
6125                         copy->cpy_hdr.links.prev = previous_prev;
6126                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6127                         copy->size = total_size;
6128                 }
6129                 vm_map_lock(dst_map);
6130                 while(TRUE) {
6131                         if (!vm_map_lookup_entry(dst_map,
6132                                                  base_addr, &tmp_entry)) {
6133                                 vm_map_unlock(dst_map);
6134                                 return(KERN_INVALID_ADDRESS);
6135                         }
6136                         if (tmp_entry->in_transition) {
6137                                 entry->needs_wakeup = TRUE;
6138                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6139                         } else {
6140                                 break;
6141                         }
6142                 }
6143                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6144
6145                 entry = tmp_entry;
6146         } /* while */
6147
6148         /*
6149          *      Throw away the vm_map_copy object
6150          */
6151         vm_map_copy_discard(copy);
6152
6153         return(KERN_SUCCESS);
6154 }/* vm_map_copy_overwrite */
6155
6156 kern_return_t
6157 vm_map_copy_overwrite(
6158         vm_map_t        dst_map,
6159         vm_map_offset_t dst_addr,
6160         vm_map_copy_t   copy,
6161         boolean_t       interruptible)
6162 {
6163         return vm_map_copy_overwrite_nested(
6164                 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL);
6165 }
6166
6167
6168 /*
6169  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6170  *
6171  *      Decription:
6172  *      Physically copy unaligned data
6173  *
6174  *      Implementation:
6175  *      Unaligned parts of pages have to be physically copied.  We use
6176  *      a modified form of vm_fault_copy (which understands none-aligned
6177  *      page offsets and sizes) to do the copy.  We attempt to copy as
6178  *      much memory in one go as possibly, however vm_fault_copy copies
6179  *      within 1 memory object so we have to find the smaller of "amount left"
6180  *      "source object data size" and "target object data size".  With
6181  *      unaligned data we don't need to split regions, therefore the source
6182  *      (copy) object should be one map entry, the target range may be split
6183  *      over multiple map entries however.  In any event we are pessimistic
6184  *      about these assumptions.
6185  *
6186  *      Assumptions:
6187  *      dst_map is locked on entry and is return locked on success,
6188  *      unlocked on error.
6189  */
6190
6191 static kern_return_t
6192 vm_map_copy_overwrite_unaligned(
6193         vm_map_t        dst_map,
6194         vm_map_entry_t  entry,
6195         vm_map_copy_t   copy,
6196         vm_map_offset_t start)
6197 {
6198         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6199         vm_map_version_t        version;
6200         vm_object_t             dst_object;
6201         vm_object_offset_t      dst_offset;
6202         vm_object_offset_t      src_offset;
6203         vm_object_offset_t      entry_offset;
6204         vm_map_offset_t         entry_end;
6205         vm_map_size_t           src_size,
6206                                 dst_size,
6207                                 copy_size,
6208                                 amount_left;
6209         kern_return_t           kr = KERN_SUCCESS;
6210
6211         vm_map_lock_write_to_read(dst_map);
6212
6213         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6214         amount_left = copy->size;
6215 /*
6216  *      unaligned so we never clipped this entry, we need the offset into
6217  *      the vm_object not just the data.
6218  */
6219         while (amount_left > 0) {
6220
6221                 if (entry == vm_map_to_entry(dst_map)) {
6222                         vm_map_unlock_read(dst_map);
6223                         return KERN_INVALID_ADDRESS;
6224                 }
6225
6226                 /* "start" must be within the current map entry */
6227                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6228
6229                 dst_offset = start - entry->vme_start;
6230
6231                 dst_size = entry->vme_end - start;
6232
6233                 src_size = copy_entry->vme_end -
6234                         (copy_entry->vme_start + src_offset);
6235
6236                 if (dst_size < src_size) {
6237 /*
6238  *                      we can only copy dst_size bytes before
6239  *                      we have to get the next destination entry
6240  */
6241                         copy_size = dst_size;
6242                 } else {
6243 /*
6244  *                      we can only copy src_size bytes before
6245  *                      we have to get the next source copy entry
6246  */
6247                         copy_size = src_size;
6248                 }
6249
6250                 if (copy_size > amount_left) {
6251                         copy_size = amount_left;
6252                 }
6253 /*
6254  *              Entry needs copy, create a shadow shadow object for
6255  *              Copy on write region.
6256  */
6257                 if (entry->needs_copy &&
6258                     ((entry->protection & VM_PROT_WRITE) != 0))
6259                 {
6260                         if (vm_map_lock_read_to_write(dst_map)) {
6261                                 vm_map_lock_read(dst_map);
6262                                 goto RetryLookup;
6263                         }
6264                         vm_object_shadow(&entry->object.vm_object,
6265                                          &entry->offset,
6266                                          (vm_map_size_t)(entry->vme_end
6267                                                          - entry->vme_start));
6268                         entry->needs_copy = FALSE;
6269                         vm_map_lock_write_to_read(dst_map);
6270                 }
6271                 dst_object = entry->object.vm_object;
6272 /*
6273  *              unlike with the virtual (aligned) copy we're going
6274  *              to fault on it therefore we need a target object.
6275  */
6276                 if (dst_object == VM_OBJECT_NULL) {
6277                         if (vm_map_lock_read_to_write(dst_map)) {
6278                                 vm_map_lock_read(dst_map);
6279                                 goto RetryLookup;
6280                         }
6281                         dst_object = vm_object_allocate((vm_map_size_t)
6282                                                         entry->vme_end - entry->vme_start);
6283                         entry->object.vm_object = dst_object;
6284                         entry->offset = 0;
6285                         vm_map_lock_write_to_read(dst_map);
6286                 }
6287 /*
6288  *              Take an object reference and unlock map. The "entry" may
6289  *              disappear or change when the map is unlocked.
6290  */
6291                 vm_object_reference(dst_object);
6292                 version.main_timestamp = dst_map->timestamp;
6293                 entry_offset = entry->offset;
6294                 entry_end = entry->vme_end;
6295                 vm_map_unlock_read(dst_map);
6296 /*
6297  *              Copy as much as possible in one pass
6298  */
6299                 kr = vm_fault_copy(
6300                         copy_entry->object.vm_object,
6301                         copy_entry->offset + src_offset,
6302                         &copy_size,
6303                         dst_object,
6304                         entry_offset + dst_offset,
6305                         dst_map,
6306                         &version,
6307                         THREAD_UNINT );
6308
6309                 start += copy_size;
6310                 src_offset += copy_size;
6311                 amount_left -= copy_size;
6312 /*
6313  *              Release the object reference
6314  */
6315                 vm_object_deallocate(dst_object);
6316 /*
6317  *              If a hard error occurred, return it now
6318  */
6319                 if (kr != KERN_SUCCESS)
6320                         return kr;
6321
6322                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6323                     || amount_left == 0)
6324                 {
6325 /*
6326  *                      all done with this copy entry, dispose.
6327  */
6328                         vm_map_copy_entry_unlink(copy, copy_entry);
6329                         vm_object_deallocate(copy_entry->object.vm_object);
6330                         vm_map_copy_entry_dispose(copy, copy_entry);
6331
6332                         if ((copy_entry = vm_map_copy_first_entry(copy))
6333                             == vm_map_copy_to_entry(copy) && amount_left) {
6334 /*
6335  *                              not finished copying but run out of source
6336  */
6337                                 return KERN_INVALID_ADDRESS;
6338                         }
6339                         src_offset = 0;
6340                 }
6341
6342                 if (amount_left == 0)
6343                         return KERN_SUCCESS;
6344
6345                 vm_map_lock_read(dst_map);
6346                 if (version.main_timestamp == dst_map->timestamp) {
6347                         if (start == entry_end) {
6348 /*
6349  *                              destination region is split.  Use the version
6350  *                              information to avoid a lookup in the normal
6351  *                              case.
6352  */
6353                                 entry = entry->vme_next;
6354 /*
6355  *                              should be contiguous. Fail if we encounter
6356  *                              a hole in the destination.
6357  */
6358                                 if (start != entry->vme_start) {
6359                                         vm_map_unlock_read(dst_map);
6360                                         return KERN_INVALID_ADDRESS ;
6361                                 }
6362                         }
6363                 } else {
6364 /*
6365  *                      Map version check failed.
6366  *                      we must lookup the entry because somebody
6367  *                      might have changed the map behind our backs.
6368  */
6369                 RetryLookup:
6370                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6371                         {
6372                                 vm_map_unlock_read(dst_map);
6373                                 return KERN_INVALID_ADDRESS ;
6374                         }
6375                 }
6376         }/* while */
6377
6378         return KERN_SUCCESS;
6379 }/* vm_map_copy_overwrite_unaligned */
6380
6381 /*
6382  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6383  *
6384  *      Description:
6385  *      Does all the vm_trickery possible for whole pages.
6386  *
6387  *      Implementation:
6388  *
6389  *      If there are no permanent objects in the destination,
6390  *      and the source and destination map entry zones match,
6391  *      and the destination map entry is not shared,
6392  *      then the map entries can be deleted and replaced
6393  *      with those from the copy.  The following code is the
6394  *      basic idea of what to do, but there are lots of annoying
6395  *      little details about getting protection and inheritance
6396  *      right.  Should add protection, inheritance, and sharing checks
6397  *      to the above pass and make sure that no wiring is involved.
6398  */
6399
6400 static kern_return_t
6401 vm_map_copy_overwrite_aligned(
6402         vm_map_t        dst_map,
6403         vm_map_entry_t  tmp_entry,
6404         vm_map_copy_t   copy,
6405         vm_map_offset_t start,
6406         __unused pmap_t pmap)
6407 {
6408         vm_object_t     object;
6409         vm_map_entry_t  copy_entry;
6410         vm_map_size_t   copy_size;
6411         vm_map_size_t   size;
6412         vm_map_entry_t  entry;
6413
6414         while ((copy_entry = vm_map_copy_first_entry(copy))
6415                != vm_map_copy_to_entry(copy))
6416         {
6417                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6418
6419                 entry = tmp_entry;
6420                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6421                 if (entry == vm_map_to_entry(dst_map)) {
6422                         vm_map_unlock(dst_map);
6423                         return KERN_INVALID_ADDRESS;
6424                 }
6425                 size = (entry->vme_end - entry->vme_start);
6426                 /*
6427                  *      Make sure that no holes popped up in the
6428                  *      address map, and that the protection is
6429                  *      still valid, in case the map was unlocked
6430                  *      earlier.
6431                  */
6432
6433                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6434                                                     && !entry->needs_copy)) {
6435                         vm_map_unlock(dst_map);
6436                         return(KERN_INVALID_ADDRESS);
6437                 }
6438                 assert(entry != vm_map_to_entry(dst_map));
6439
6440                 /*
6441                  *      Check protection again
6442                  */
6443
6444                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6445                         vm_map_unlock(dst_map);
6446                         return(KERN_PROTECTION_FAILURE);
6447                 }
6448
6449                 /*
6450                  *      Adjust to source size first
6451                  */
6452
6453                 if (copy_size < size) {
6454                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6455                         size = copy_size;
6456                 }
6457
6458                 /*
6459                  *      Adjust to destination size
6460                  */
6461
6462                 if (size < copy_size) {
6463                         vm_map_copy_clip_end(copy, copy_entry,
6464                                              copy_entry->vme_start + size);
6465                         copy_size = size;
6466                 }
6467
6468                 assert((entry->vme_end - entry->vme_start) == size);
6469                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6470                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6471
6472                 /*
6473                  *      If the destination contains temporary unshared memory,
6474                  *      we can perform the copy by throwing it away and
6475                  *      installing the source data.
6476                  */
6477
6478                 object = entry->object.vm_object;
6479                 if ((!entry->is_shared &&
6480                      ((object == VM_OBJECT_NULL) ||
6481                       (object->internal && !object->true_share))) ||
6482                     entry->needs_copy) {
6483                         vm_object_t     old_object = entry->object.vm_object;
6484                         vm_object_offset_t      old_offset = entry->offset;
6485                         vm_object_offset_t      offset;
6486
6487                         /*
6488                          * Ensure that the source and destination aren't
6489                          * identical
6490                          */
6491                         if (old_object == copy_entry->object.vm_object &&
6492                             old_offset == copy_entry->offset) {
6493                                 vm_map_copy_entry_unlink(copy, copy_entry);
6494                                 vm_map_copy_entry_dispose(copy, copy_entry);
6495
6496                                 if (old_object != VM_OBJECT_NULL)
6497                                         vm_object_deallocate(old_object);
6498
6499                                 start = tmp_entry->vme_end;
6500                                 tmp_entry = tmp_entry->vme_next;
6501                                 continue;
6502                         }
6503
6504                         if (old_object != VM_OBJECT_NULL) {
6505                                 if(entry->is_sub_map) {
6506                                         if(entry->use_pmap) {
6507 #ifndef NO_NESTED_PMAP
6508                                                 pmap_unnest(dst_map->pmap,
6509                                                             (addr64_t)entry->vme_start,
6510                                                             entry->vme_end - entry->vme_start);
6511 #endif  /* NO_NESTED_PMAP */
6512                                                 if(dst_map->mapped) {
6513                                                         /* clean up parent */
6514                                                         /* map/maps */
6515                                                         vm_map_submap_pmap_clean(
6516                                                                 dst_map, entry->vme_start,
6517                                                                 entry->vme_end,
6518                                                                 entry->object.sub_map,
6519                                                                 entry->offset);
6520                                                 }
6521                                         } else {
6522                                                 vm_map_submap_pmap_clean(
6523                                                         dst_map, entry->vme_start,
6524                                                         entry->vme_end,
6525                                                         entry->object.sub_map,
6526                                                         entry->offset);
6527                                         }
6528                                         vm_map_deallocate(
6529                                                 entry->object.sub_map);
6530                                 } else {
6531                                         if(dst_map->mapped) {
6532                                                 vm_object_pmap_protect(
6533                                                         entry->object.vm_object,
6534                                                         entry->offset,
6535                                                         entry->vme_end
6536                                                         - entry->vme_start,
6537                                                         PMAP_NULL,
6538                                                         entry->vme_start,
6539                                                         VM_PROT_NONE);
6540                                         } else {
6541                                                 pmap_remove(dst_map->pmap,
6542                                                             (addr64_t)(entry->vme_start),
6543                                                             (addr64_t)(entry->vme_end));
6544                                         }
6545                                         vm_object_deallocate(old_object);
6546                                 }
6547                         }
6548
6549                         entry->is_sub_map = FALSE;
6550                         entry->object = copy_entry->object;
6551                         object = entry->object.vm_object;
6552                         entry->needs_copy = copy_entry->needs_copy;
6553                         entry->wired_count = 0;
6554                         entry->user_wired_count = 0;
6555                         offset = entry->offset = copy_entry->offset;
6556
6557                         vm_map_copy_entry_unlink(copy, copy_entry);
6558                         vm_map_copy_entry_dispose(copy, copy_entry);
6559
6560                         /*
6561                          * we could try to push pages into the pmap at this point, BUT
6562                          * this optimization only saved on average 2 us per page if ALL
6563                          * the pages in the source were currently mapped
6564                          * and ALL the pages in the dest were touched, if there were fewer
6565                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6566                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6567                          */
6568
6569                         /*
6570                          *      Set up for the next iteration.  The map
6571                          *      has not been unlocked, so the next
6572                          *      address should be at the end of this
6573                          *      entry, and the next map entry should be
6574                          *      the one following it.
6575                          */
6576
6577                         start = tmp_entry->vme_end;
6578                         tmp_entry = tmp_entry->vme_next;
6579                 } else {
6580                         vm_map_version_t        version;
6581                         vm_object_t             dst_object = entry->object.vm_object;
6582                         vm_object_offset_t      dst_offset = entry->offset;
6583                         kern_return_t           r;
6584
6585                         /*
6586                          *      Take an object reference, and record
6587                          *      the map version information so that the
6588                          *      map can be safely unlocked.
6589                          */
6590
6591                         vm_object_reference(dst_object);
6592
6593                         /* account for unlock bumping up timestamp */
6594                         version.main_timestamp = dst_map->timestamp + 1;
6595
6596                         vm_map_unlock(dst_map);
6597
6598                         /*
6599                          *      Copy as much as possible in one pass
6600                          */
6601
6602                         copy_size = size;
6603                         r = vm_fault_copy(
6604                                 copy_entry->object.vm_object,
6605                                 copy_entry->offset,
6606                                 &copy_size,
6607                                 dst_object,
6608                                 dst_offset,
6609                                 dst_map,
6610                                 &version,
6611                                 THREAD_UNINT );
6612
6613                         /*
6614                          *      Release the object reference
6615                          */
6616
6617                         vm_object_deallocate(dst_object);
6618
6619                         /*
6620                          *      If a hard error occurred, return it now
6621                          */
6622
6623                         if (r != KERN_SUCCESS)
6624                                 return(r);
6625
6626                         if (copy_size != 0) {
6627                                 /*
6628                                  *      Dispose of the copied region
6629                                  */
6630
6631                                 vm_map_copy_clip_end(copy, copy_entry,
6632                                                      copy_entry->vme_start + copy_size);
6633                                 vm_map_copy_entry_unlink(copy, copy_entry);
6634                                 vm_object_deallocate(copy_entry->object.vm_object);
6635                                 vm_map_copy_entry_dispose(copy, copy_entry);
6636                         }
6637
6638                         /*
6639                          *      Pick up in the destination map where we left off.
6640                          *
6641                          *      Use the version information to avoid a lookup
6642                          *      in the normal case.
6643                          */
6644
6645                         start += copy_size;
6646                         vm_map_lock(dst_map);
6647                         if (version.main_timestamp == dst_map->timestamp) {
6648                                 /* We can safely use saved tmp_entry value */
6649
6650                                 vm_map_clip_end(dst_map, tmp_entry, start);
6651                                 tmp_entry = tmp_entry->vme_next;
6652                         } else {
6653                                 /* Must do lookup of tmp_entry */
6654
6655                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
6656                                         vm_map_unlock(dst_map);
6657                                         return(KERN_INVALID_ADDRESS);
6658                                 }
6659                                 vm_map_clip_start(dst_map, tmp_entry, start);
6660                         }
6661                 }
6662         }/* while */
6663
6664         return(KERN_SUCCESS);
6665 }/* vm_map_copy_overwrite_aligned */
6666
6667 /*
6668  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
6669  *
6670  *      Description:
6671  *              Copy in data to a kernel buffer from space in the
6672  *              source map. The original space may be optionally
6673  *              deallocated.
6674  *
6675  *              If successful, returns a new copy object.
6676  */
6677 static kern_return_t
6678 vm_map_copyin_kernel_buffer(
6679         vm_map_t        src_map,
6680         vm_map_offset_t src_addr,
6681         vm_map_size_t   len,
6682         boolean_t       src_destroy,
6683         vm_map_copy_t   *copy_result)
6684 {
6685         kern_return_t kr;
6686         vm_map_copy_t copy;
6687         vm_size_t kalloc_size;
6688
6689         if ((vm_size_t) len != len) {
6690                 /* "len" is too big and doesn't fit in a "vm_size_t" */
6691                 return KERN_RESOURCE_SHORTAGE;
6692         }
6693         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
6694         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
6695
6696         copy = (vm_map_copy_t) kalloc(kalloc_size);
6697         if (copy == VM_MAP_COPY_NULL) {
6698                 return KERN_RESOURCE_SHORTAGE;
6699         }
6700         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
6701         copy->size = len;
6702         copy->offset = 0;
6703         copy->cpy_kdata = (void *) (copy + 1);
6704         copy->cpy_kalloc_size = kalloc_size;
6705
6706         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
6707         if (kr != KERN_SUCCESS) {
6708                 kfree(copy, kalloc_size);
6709                 return kr;
6710         }
6711         if (src_destroy) {
6712                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
6713                                      vm_map_round_page(src_addr + len),
6714                                      VM_MAP_REMOVE_INTERRUPTIBLE |
6715                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
6716                                      (src_map == kernel_map) ?
6717                                      VM_MAP_REMOVE_KUNWIRE : 0);
6718         }
6719         *copy_result = copy;
6720         return KERN_SUCCESS;
6721 }
6722
6723 /*
6724  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
6725  *
6726  *      Description:
6727  *              Copy out data from a kernel buffer into space in the
6728  *              destination map. The space may be otpionally dynamically
6729  *              allocated.
6730  *
6731  *              If successful, consumes the copy object.
6732  *              Otherwise, the caller is responsible for it.
6733  */
6734 static int vm_map_copyout_kernel_buffer_failures = 0;
6735 static kern_return_t
6736 vm_map_copyout_kernel_buffer(
6737         vm_map_t                map,
6738         vm_map_address_t        *addr,  /* IN/OUT */
6739         vm_map_copy_t           copy,
6740         boolean_t               overwrite)
6741 {
6742         kern_return_t kr = KERN_SUCCESS;
6743         thread_t thread = current_thread();
6744
6745         if (!overwrite) {
6746
6747                 /*
6748                  * Allocate space in the target map for the data
6749                  */
6750                 *addr = 0;
6751                 kr = vm_map_enter(map,
6752                                   addr,
6753                                   vm_map_round_page(copy->size),
6754                                   (vm_map_offset_t) 0,
6755                                   VM_FLAGS_ANYWHERE,
6756                                   VM_OBJECT_NULL,
6757                                   (vm_object_offset_t) 0,
6758                                   FALSE,
6759                                   VM_PROT_DEFAULT,
6760                                   VM_PROT_ALL,
6761                                   VM_INHERIT_DEFAULT);
6762                 if (kr != KERN_SUCCESS)
6763                         return kr;
6764         }
6765
6766         /*
6767          * Copyout the data from the kernel buffer to the target map.
6768          */
6769         if (thread->map == map) {
6770
6771                 /*
6772                  * If the target map is the current map, just do
6773                  * the copy.
6774                  */
6775                 assert((vm_size_t) copy->size == copy->size);
6776                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6777                         kr = KERN_INVALID_ADDRESS;
6778                 }
6779         }
6780         else {
6781                 vm_map_t oldmap;
6782
6783                 /*
6784                  * If the target map is another map, assume the
6785                  * target's address space identity for the duration
6786                  * of the copy.
6787                  */
6788                 vm_map_reference(map);
6789                 oldmap = vm_map_switch(map);
6790
6791                 assert((vm_size_t) copy->size == copy->size);
6792                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
6793                         vm_map_copyout_kernel_buffer_failures++;
6794                         kr = KERN_INVALID_ADDRESS;
6795                 }
6796
6797                 (void) vm_map_switch(oldmap);
6798                 vm_map_deallocate(map);
6799         }
6800
6801         if (kr != KERN_SUCCESS) {
6802                 /* the copy failed, clean up */
6803                 if (!overwrite) {
6804                         /*
6805                          * Deallocate the space we allocated in the target map.
6806                          */
6807                         (void) vm_map_remove(map,
6808                                              vm_map_trunc_page(*addr),
6809                                              vm_map_round_page(*addr +
6810                                                                vm_map_round_page(copy->size)),
6811                                              VM_MAP_NO_FLAGS);
6812                         *addr = 0;
6813                 }
6814         } else {
6815                 /* copy was successful, dicard the copy structure */
6816                 kfree(copy, copy->cpy_kalloc_size);
6817         }
6818
6819         return kr;
6820 }
6821
6822 /*
6823  *      Macro:          vm_map_copy_insert
6824  *
6825  *      Description:
6826  *              Link a copy chain ("copy") into a map at the
6827  *              specified location (after "where").
6828  *      Side effects:
6829  *              The copy chain is destroyed.
6830  *      Warning:
6831  *              The arguments are evaluated multiple times.
6832  */
6833 #define vm_map_copy_insert(map, where, copy)                            \
6834 MACRO_BEGIN                                                             \
6835         vm_map_t VMCI_map;                                              \
6836         vm_map_entry_t VMCI_where;                                      \
6837         vm_map_copy_t VMCI_copy;                                        \
6838         VMCI_map = (map);                                               \
6839         VMCI_where = (where);                                           \
6840         VMCI_copy = (copy);                                             \
6841         ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\
6842                 ->vme_next = (VMCI_where->vme_next);                    \
6843         ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy))   \
6844                 ->vme_prev = VMCI_where;                                \
6845         VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries;          \
6846         UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free);              \
6847         zfree(vm_map_copy_zone, VMCI_copy);                             \
6848 MACRO_END
6849
6850 /*
6851  *      Routine:        vm_map_copyout
6852  *
6853  *      Description:
6854  *              Copy out a copy chain ("copy") into newly-allocated
6855  *              space in the destination map.
6856  *
6857  *              If successful, consumes the copy object.
6858  *              Otherwise, the caller is responsible for it.
6859  */
6860 kern_return_t
6861 vm_map_copyout(
6862         vm_map_t                dst_map,
6863         vm_map_address_t        *dst_addr,      /* OUT */
6864         vm_map_copy_t           copy)
6865 {
6866         vm_map_size_t           size;
6867         vm_map_size_t           adjustment;
6868         vm_map_offset_t         start;
6869         vm_object_offset_t      vm_copy_start;
6870         vm_map_entry_t          last;
6871         register
6872         vm_map_entry_t          entry;
6873
6874         /*
6875          *      Check for null copy object.
6876          */
6877
6878         if (copy == VM_MAP_COPY_NULL) {
6879                 *dst_addr = 0;
6880                 return(KERN_SUCCESS);
6881         }
6882
6883         /*
6884          *      Check for special copy object, created
6885          *      by vm_map_copyin_object.
6886          */
6887
6888         if (copy->type == VM_MAP_COPY_OBJECT) {
6889                 vm_object_t             object = copy->cpy_object;
6890                 kern_return_t           kr;
6891                 vm_object_offset_t      offset;
6892
6893                 offset = vm_object_trunc_page(copy->offset);
6894                 size = vm_map_round_page(copy->size +
6895                                          (vm_map_size_t)(copy->offset - offset));
6896                 *dst_addr = 0;
6897                 kr = vm_map_enter(dst_map, dst_addr, size,
6898                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
6899                                   object, offset, FALSE,
6900                                   VM_PROT_DEFAULT, VM_PROT_ALL,
6901                                   VM_INHERIT_DEFAULT);
6902                 if (kr != KERN_SUCCESS)
6903                         return(kr);
6904                 /* Account for non-pagealigned copy object */
6905                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
6906                 zfree(vm_map_copy_zone, copy);
6907                 return(KERN_SUCCESS);
6908         }
6909
6910         /*
6911          *      Check for special kernel buffer allocated
6912          *      by new_ipc_kmsg_copyin.
6913          */
6914
6915         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6916                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
6917                                                     copy, FALSE));
6918         }
6919
6920         /*
6921          *      Find space for the data
6922          */
6923
6924         vm_copy_start = vm_object_trunc_page(copy->offset);
6925         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
6926                 - vm_copy_start;
6927
6928 StartAgain: ;
6929
6930         vm_map_lock(dst_map);
6931         assert(first_free_is_valid(dst_map));
6932         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
6933                 vm_map_min(dst_map) : last->vme_end;
6934
6935         while (TRUE) {
6936                 vm_map_entry_t  next = last->vme_next;
6937                 vm_map_offset_t end = start + size;
6938
6939                 if ((end > dst_map->max_offset) || (end < start)) {
6940                         if (dst_map->wait_for_space) {
6941                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
6942                                         assert_wait((event_t) dst_map,
6943                                                     THREAD_INTERRUPTIBLE);
6944                                         vm_map_unlock(dst_map);
6945                                         thread_block(THREAD_CONTINUE_NULL);
6946                                         goto StartAgain;
6947                                 }
6948                         }
6949                         vm_map_unlock(dst_map);
6950                         return(KERN_NO_SPACE);
6951                 }
6952
6953                 if ((next == vm_map_to_entry(dst_map)) ||
6954                     (next->vme_start >= end))
6955                         break;
6956
6957                 last = next;
6958                 start = last->vme_end;
6959         }
6960
6961         /*
6962          *      Since we're going to just drop the map
6963          *      entries from the copy into the destination
6964          *      map, they must come from the same pool.
6965          */
6966
6967         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
6968                 /*
6969                  * Mismatches occur when dealing with the default
6970                  * pager.
6971                  */
6972                 zone_t          old_zone;
6973                 vm_map_entry_t  next, new;
6974
6975                 /*
6976                  * Find the zone that the copies were allocated from
6977                  */
6978                 old_zone = (copy->cpy_hdr.entries_pageable)
6979                         ? vm_map_entry_zone
6980                         : vm_map_kentry_zone;
6981                 entry = vm_map_copy_first_entry(copy);
6982
6983                 /*
6984                  * Reinitialize the copy so that vm_map_copy_entry_link
6985                  * will work.
6986                  */
6987                 copy->cpy_hdr.nentries = 0;
6988                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
6989                 vm_map_copy_first_entry(copy) =
6990                         vm_map_copy_last_entry(copy) =
6991                         vm_map_copy_to_entry(copy);
6992
6993                 /*
6994                  * Copy each entry.
6995                  */
6996                 while (entry != vm_map_copy_to_entry(copy)) {
6997                         new = vm_map_copy_entry_create(copy);
6998                         vm_map_entry_copy_full(new, entry);
6999                         new->use_pmap = FALSE;  /* clr address space specifics */
7000                         vm_map_copy_entry_link(copy,
7001                                                vm_map_copy_last_entry(copy),
7002                                                new);
7003                         next = entry->vme_next;
7004                         zfree(old_zone, entry);
7005                         entry = next;
7006                 }
7007         }
7008
7009         /*
7010          *      Adjust the addresses in the copy chain, and
7011          *      reset the region attributes.
7012          */
7013
7014         adjustment = start - vm_copy_start;
7015         for (entry = vm_map_copy_first_entry(copy);
7016              entry != vm_map_copy_to_entry(copy);
7017              entry = entry->vme_next) {
7018                 entry->vme_start += adjustment;
7019                 entry->vme_end += adjustment;
7020
7021                 entry->inheritance = VM_INHERIT_DEFAULT;
7022                 entry->protection = VM_PROT_DEFAULT;
7023                 entry->max_protection = VM_PROT_ALL;
7024                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7025
7026                 /*
7027                  * If the entry is now wired,
7028                  * map the pages into the destination map.
7029                  */
7030                 if (entry->wired_count != 0) {
7031                         register vm_map_offset_t va;
7032                         vm_object_offset_t       offset;
7033                         register vm_object_t object;
7034                         vm_prot_t prot;
7035                         int     type_of_fault;
7036
7037                         object = entry->object.vm_object;
7038                         offset = entry->offset;
7039                         va = entry->vme_start;
7040
7041                         pmap_pageable(dst_map->pmap,
7042                                       entry->vme_start,
7043                                       entry->vme_end,
7044                                       TRUE);
7045
7046                         while (va < entry->vme_end) {
7047                                 register vm_page_t      m;
7048
7049                                 /*
7050                                  * Look up the page in the object.
7051                                  * Assert that the page will be found in the
7052                                  * top object:
7053                                  * either
7054                                  *      the object was newly created by
7055                                  *      vm_object_copy_slowly, and has
7056                                  *      copies of all of the pages from
7057                                  *      the source object
7058                                  * or
7059                                  *      the object was moved from the old
7060                                  *      map entry; because the old map
7061                                  *      entry was wired, all of the pages
7062                                  *      were in the top-level object.
7063                                  *      (XXX not true if we wire pages for
7064                                  *       reading)
7065                                  */
7066                                 vm_object_lock(object);
7067
7068                                 m = vm_page_lookup(object, offset);
7069                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7070                                     m->absent)
7071                                         panic("vm_map_copyout: wiring %p", m);
7072
7073                                 /*
7074                                  * ENCRYPTED SWAP:
7075                                  * The page is assumed to be wired here, so it
7076                                  * shouldn't be encrypted.  Otherwise, we
7077                                  * couldn't enter it in the page table, since
7078                                  * we don't want the user to see the encrypted
7079                                  * data.
7080                                  */
7081                                 ASSERT_PAGE_DECRYPTED(m);
7082
7083                                 prot = entry->protection;
7084
7085                                 if (override_nx(dst_map, entry->alias) && prot)
7086                                         prot |= VM_PROT_EXECUTE;
7087
7088                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7089
7090                                 vm_fault_enter(m, dst_map->pmap, va, prot,
7091                                                VM_PAGE_WIRED(m), FALSE, FALSE,
7092                                                &type_of_fault);
7093
7094                                 vm_object_unlock(object);
7095
7096                                 offset += PAGE_SIZE_64;
7097                                 va += PAGE_SIZE;
7098                         }
7099                 }
7100         }
7101
7102         /*
7103          *      Correct the page alignment for the result
7104          */
7105
7106         *dst_addr = start + (copy->offset - vm_copy_start);
7107
7108         /*
7109          *      Update the hints and the map size
7110          */
7111
7112         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7113
7114         dst_map->size += size;
7115
7116         /*
7117          *      Link in the copy
7118          */
7119
7120         vm_map_copy_insert(dst_map, last, copy);
7121
7122         vm_map_unlock(dst_map);
7123
7124         /*
7125          * XXX  If wiring_required, call vm_map_pageable
7126          */
7127
7128         return(KERN_SUCCESS);
7129 }
7130
7131 /*
7132  *      Routine:        vm_map_copyin
7133  *
7134  *      Description:
7135  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7136  *
7137  */
7138
7139 #undef vm_map_copyin
7140
7141 kern_return_t
7142 vm_map_copyin(
7143         vm_map_t                        src_map,
7144         vm_map_address_t        src_addr,
7145         vm_map_size_t           len,
7146         boolean_t                       src_destroy,
7147         vm_map_copy_t           *copy_result)   /* OUT */
7148 {
7149         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7150                                         FALSE, copy_result, FALSE));
7151 }
7152
7153 /*
7154  *      Routine:        vm_map_copyin_common
7155  *
7156  *      Description:
7157  *              Copy the specified region (src_addr, len) from the
7158  *              source address space (src_map), possibly removing
7159  *              the region from the source address space (src_destroy).
7160  *
7161  *      Returns:
7162  *              A vm_map_copy_t object (copy_result), suitable for
7163  *              insertion into another address space (using vm_map_copyout),
7164  *              copying over another address space region (using
7165  *              vm_map_copy_overwrite).  If the copy is unused, it
7166  *              should be destroyed (using vm_map_copy_discard).
7167  *
7168  *      In/out conditions:
7169  *              The source map should not be locked on entry.
7170  */
7171
7172 typedef struct submap_map {
7173         vm_map_t        parent_map;
7174         vm_map_offset_t base_start;
7175         vm_map_offset_t base_end;
7176         vm_map_size_t   base_len;
7177         struct submap_map *next;
7178 } submap_map_t;
7179
7180 kern_return_t
7181 vm_map_copyin_common(
7182         vm_map_t        src_map,
7183         vm_map_address_t src_addr,
7184         vm_map_size_t   len,
7185         boolean_t       src_destroy,
7186         __unused boolean_t      src_volatile,
7187         vm_map_copy_t   *copy_result,   /* OUT */
7188         boolean_t       use_maxprot)
7189 {
7190         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7191                                          * in multi-level lookup, this
7192                                          * entry contains the actual
7193                                          * vm_object/offset.
7194                                          */
7195         register
7196         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7197
7198         vm_map_offset_t src_start;      /* Start of current entry --
7199                                          * where copy is taking place now
7200                                          */
7201         vm_map_offset_t src_end;        /* End of entire region to be
7202                                          * copied */
7203         vm_map_offset_t src_base;
7204         vm_map_t        base_map = src_map;
7205         boolean_t       map_share=FALSE;
7206         submap_map_t    *parent_maps = NULL;
7207
7208         register
7209         vm_map_copy_t   copy;           /* Resulting copy */
7210         vm_map_address_t        copy_addr;
7211
7212         /*
7213          *      Check for copies of zero bytes.
7214          */
7215
7216         if (len == 0) {
7217                 *copy_result = VM_MAP_COPY_NULL;
7218                 return(KERN_SUCCESS);
7219         }
7220
7221         /*
7222          *      Check that the end address doesn't overflow
7223          */
7224         src_end = src_addr + len;
7225         if (src_end < src_addr)
7226                 return KERN_INVALID_ADDRESS;
7227
7228         /*
7229          * If the copy is sufficiently small, use a kernel buffer instead
7230          * of making a virtual copy.  The theory being that the cost of
7231          * setting up VM (and taking C-O-W faults) dominates the copy costs
7232          * for small regions.
7233          */
7234         if ((len < msg_ool_size_small) && !use_maxprot)
7235                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7236                                                    src_destroy, copy_result);
7237
7238         /*
7239          *      Compute (page aligned) start and end of region
7240          */
7241         src_start = vm_map_trunc_page(src_addr);
7242         src_end = vm_map_round_page(src_end);
7243
7244         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7245
7246         /*
7247          *      Allocate a header element for the list.
7248          *
7249          *      Use the start and end in the header to
7250          *      remember the endpoints prior to rounding.
7251          */
7252
7253         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7254         vm_map_copy_first_entry(copy) =
7255                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7256         copy->type = VM_MAP_COPY_ENTRY_LIST;
7257         copy->cpy_hdr.nentries = 0;
7258         copy->cpy_hdr.entries_pageable = TRUE;
7259
7260         copy->offset = src_addr;
7261         copy->size = len;
7262
7263         new_entry = vm_map_copy_entry_create(copy);
7264
7265 #define RETURN(x)                                               \
7266         MACRO_BEGIN                                             \
7267         vm_map_unlock(src_map);                                 \
7268         if(src_map != base_map)                                 \
7269                 vm_map_deallocate(src_map);                     \
7270         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7271                 vm_map_copy_entry_dispose(copy,new_entry);      \
7272         vm_map_copy_discard(copy);                              \
7273         {                                                       \
7274                 submap_map_t    *_ptr;                          \
7275                                                                 \
7276                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7277                         parent_maps=parent_maps->next;          \
7278                         if (_ptr->parent_map != base_map)       \
7279                                 vm_map_deallocate(_ptr->parent_map);    \
7280                         kfree(_ptr, sizeof(submap_map_t));      \
7281                 }                                               \
7282         }                                                       \
7283         MACRO_RETURN(x);                                        \
7284         MACRO_END
7285
7286         /*
7287          *      Find the beginning of the region.
7288          */
7289
7290         vm_map_lock(src_map);
7291
7292         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7293                 RETURN(KERN_INVALID_ADDRESS);
7294         if(!tmp_entry->is_sub_map) {
7295                 vm_map_clip_start(src_map, tmp_entry, src_start);
7296         }
7297         /* set for later submap fix-up */
7298         copy_addr = src_start;
7299
7300         /*
7301          *      Go through entries until we get to the end.
7302          */
7303
7304         while (TRUE) {
7305                 register
7306                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7307                 vm_map_size_t   src_size;               /* Size of source
7308                                                          * map entry (in both
7309                                                          * maps)
7310                                                          */
7311
7312                 register
7313                 vm_object_t             src_object;     /* Object to copy */
7314                 vm_object_offset_t      src_offset;
7315
7316                 boolean_t       src_needs_copy;         /* Should source map
7317                                                          * be made read-only
7318                                                          * for copy-on-write?
7319                                                          */
7320
7321                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7322
7323                 boolean_t       was_wired;              /* Was source wired? */
7324                 vm_map_version_t version;               /* Version before locks
7325                                                          * dropped to make copy
7326                                                          */
7327                 kern_return_t   result;                 /* Return value from
7328                                                          * copy_strategically.
7329                                                          */
7330                 while(tmp_entry->is_sub_map) {
7331                         vm_map_size_t submap_len;
7332                         submap_map_t *ptr;
7333
7334                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7335                         ptr->next = parent_maps;
7336                         parent_maps = ptr;
7337                         ptr->parent_map = src_map;
7338                         ptr->base_start = src_start;
7339                         ptr->base_end = src_end;
7340                         submap_len = tmp_entry->vme_end - src_start;
7341                         if(submap_len > (src_end-src_start))
7342                                 submap_len = src_end-src_start;
7343                         ptr->base_len = submap_len;
7344
7345                         src_start -= tmp_entry->vme_start;
7346                         src_start += tmp_entry->offset;
7347                         src_end = src_start + submap_len;
7348                         src_map = tmp_entry->object.sub_map;
7349                         vm_map_lock(src_map);
7350                         /* keep an outstanding reference for all maps in */
7351                         /* the parents tree except the base map */
7352                         vm_map_reference(src_map);
7353                         vm_map_unlock(ptr->parent_map);
7354                         if (!vm_map_lookup_entry(
7355                                     src_map, src_start, &tmp_entry))
7356                                 RETURN(KERN_INVALID_ADDRESS);
7357                         map_share = TRUE;
7358                         if(!tmp_entry->is_sub_map)
7359                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7360                         src_entry = tmp_entry;
7361                 }
7362                 /* we are now in the lowest level submap... */
7363
7364                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7365                     (tmp_entry->object.vm_object->phys_contiguous)) {
7366                         /* This is not, supported for now.In future */
7367                         /* we will need to detect the phys_contig   */
7368                         /* condition and then upgrade copy_slowly   */
7369                         /* to do physical copy from the device mem  */
7370                         /* based object. We can piggy-back off of   */
7371                         /* the was wired boolean to set-up the      */
7372                         /* proper handling */
7373                         RETURN(KERN_PROTECTION_FAILURE);
7374                 }
7375                 /*
7376                  *      Create a new address map entry to hold the result.
7377                  *      Fill in the fields from the appropriate source entries.
7378                  *      We must unlock the source map to do this if we need
7379                  *      to allocate a map entry.
7380                  */
7381                 if (new_entry == VM_MAP_ENTRY_NULL) {
7382                         version.main_timestamp = src_map->timestamp;
7383                         vm_map_unlock(src_map);
7384
7385                         new_entry = vm_map_copy_entry_create(copy);
7386
7387                         vm_map_lock(src_map);
7388                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7389                                 if (!vm_map_lookup_entry(src_map, src_start,
7390                                                          &tmp_entry)) {
7391                                         RETURN(KERN_INVALID_ADDRESS);
7392                                 }
7393                                 if (!tmp_entry->is_sub_map)
7394                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7395                                 continue; /* restart w/ new tmp_entry */
7396                         }
7397                 }
7398
7399                 /*
7400                  *      Verify that the region can be read.
7401                  */
7402                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7403                      !use_maxprot) ||
7404                     (src_entry->max_protection & VM_PROT_READ) == 0)
7405                         RETURN(KERN_PROTECTION_FAILURE);
7406
7407                 /*
7408                  *      Clip against the endpoints of the entire region.
7409                  */
7410
7411                 vm_map_clip_end(src_map, src_entry, src_end);
7412
7413                 src_size = src_entry->vme_end - src_start;
7414                 src_object = src_entry->object.vm_object;
7415                 src_offset = src_entry->offset;
7416                 was_wired = (src_entry->wired_count != 0);
7417
7418                 vm_map_entry_copy(new_entry, src_entry);
7419                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7420
7421                 /*
7422                  *      Attempt non-blocking copy-on-write optimizations.
7423                  */
7424
7425                 if (src_destroy &&
7426                     (src_object == VM_OBJECT_NULL ||
7427                      (src_object->internal && !src_object->true_share
7428                       && !map_share))) {
7429                         /*
7430                          * If we are destroying the source, and the object
7431                          * is internal, we can move the object reference
7432                          * from the source to the copy.  The copy is
7433                          * copy-on-write only if the source is.
7434                          * We make another reference to the object, because
7435                          * destroying the source entry will deallocate it.
7436                          */
7437                         vm_object_reference(src_object);
7438
7439                         /*
7440                          * Copy is always unwired.  vm_map_copy_entry
7441                          * set its wired count to zero.
7442                          */
7443
7444                         goto CopySuccessful;
7445                 }
7446
7447
7448         RestartCopy:
7449                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7450                     src_object, new_entry, new_entry->object.vm_object,
7451                     was_wired, 0);
7452                 if ((src_object == VM_OBJECT_NULL ||
7453                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7454                     vm_object_copy_quickly(
7455                             &new_entry->object.vm_object,
7456                             src_offset,
7457                             src_size,
7458                             &src_needs_copy,
7459                             &new_entry_needs_copy)) {
7460
7461                         new_entry->needs_copy = new_entry_needs_copy;
7462
7463                         /*
7464                          *      Handle copy-on-write obligations
7465                          */
7466
7467                         if (src_needs_copy && !tmp_entry->needs_copy) {
7468                                 vm_prot_t prot;
7469
7470                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7471
7472                                 if (override_nx(src_map, src_entry->alias) && prot)
7473                                         prot |= VM_PROT_EXECUTE;
7474
7475                                 vm_object_pmap_protect(
7476                                         src_object,
7477                                         src_offset,
7478                                         src_size,
7479                                         (src_entry->is_shared ?
7480                                          PMAP_NULL
7481                                          : src_map->pmap),
7482                                         src_entry->vme_start,
7483                                         prot);
7484
7485                                 tmp_entry->needs_copy = TRUE;
7486                         }
7487
7488                         /*
7489                          *      The map has never been unlocked, so it's safe
7490                          *      to move to the next entry rather than doing
7491                          *      another lookup.
7492                          */
7493
7494                         goto CopySuccessful;
7495                 }
7496
7497                 /*
7498                  *      Take an object reference, so that we may
7499                  *      release the map lock(s).
7500                  */
7501
7502                 assert(src_object != VM_OBJECT_NULL);
7503                 vm_object_reference(src_object);
7504
7505                 /*
7506                  *      Record the timestamp for later verification.
7507                  *      Unlock the map.
7508                  */
7509
7510                 version.main_timestamp = src_map->timestamp;
7511                 vm_map_unlock(src_map); /* Increments timestamp once! */
7512
7513                 /*
7514                  *      Perform the copy
7515                  */
7516
7517                 if (was_wired) {
7518                 CopySlowly:
7519                         vm_object_lock(src_object);
7520                         result = vm_object_copy_slowly(
7521                                 src_object,
7522                                 src_offset,
7523                                 src_size,
7524                                 THREAD_UNINT,
7525                                 &new_entry->object.vm_object);
7526                         new_entry->offset = 0;
7527                         new_entry->needs_copy = FALSE;
7528
7529                 }
7530                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7531                          (tmp_entry->is_shared  || map_share)) {
7532                         vm_object_t new_object;
7533
7534                         vm_object_lock_shared(src_object);
7535                         new_object = vm_object_copy_delayed(
7536                                 src_object,
7537                                 src_offset,
7538                                 src_size,
7539                                 TRUE);
7540                         if (new_object == VM_OBJECT_NULL)
7541                                 goto CopySlowly;
7542
7543                         new_entry->object.vm_object = new_object;
7544                         new_entry->needs_copy = TRUE;
7545                         result = KERN_SUCCESS;
7546
7547                 } else {
7548                         result = vm_object_copy_strategically(src_object,
7549                                                               src_offset,
7550                                                               src_size,
7551                                                               &new_entry->object.vm_object,
7552                                                               &new_entry->offset,
7553                                                               &new_entry_needs_copy);
7554
7555                         new_entry->needs_copy = new_entry_needs_copy;
7556                 }
7557
7558                 if (result != KERN_SUCCESS &&
7559                     result != KERN_MEMORY_RESTART_COPY) {
7560                         vm_map_lock(src_map);
7561                         RETURN(result);
7562                 }
7563
7564                 /*
7565                  *      Throw away the extra reference
7566                  */
7567
7568                 vm_object_deallocate(src_object);
7569
7570                 /*
7571                  *      Verify that the map has not substantially
7572                  *      changed while the copy was being made.
7573                  */
7574
7575                 vm_map_lock(src_map);
7576
7577                 if ((version.main_timestamp + 1) == src_map->timestamp)
7578                         goto VerificationSuccessful;
7579
7580                 /*
7581                  *      Simple version comparison failed.
7582                  *
7583                  *      Retry the lookup and verify that the
7584                  *      same object/offset are still present.
7585                  *
7586                  *      [Note: a memory manager that colludes with
7587                  *      the calling task can detect that we have
7588                  *      cheated.  While the map was unlocked, the
7589                  *      mapping could have been changed and restored.]
7590                  */
7591
7592                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7593                         RETURN(KERN_INVALID_ADDRESS);
7594                 }
7595
7596                 src_entry = tmp_entry;
7597                 vm_map_clip_start(src_map, src_entry, src_start);
7598
7599                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7600                      !use_maxprot) ||
7601                     ((src_entry->max_protection & VM_PROT_READ) == 0))
7602                         goto VerificationFailed;
7603
7604                 if (src_entry->vme_end < new_entry->vme_end)
7605                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
7606
7607                 if ((src_entry->object.vm_object != src_object) ||
7608                     (src_entry->offset != src_offset) ) {
7609
7610                         /*
7611                          *      Verification failed.
7612                          *
7613                          *      Start over with this top-level entry.
7614                          */
7615
7616                 VerificationFailed: ;
7617
7618                         vm_object_deallocate(new_entry->object.vm_object);
7619                         tmp_entry = src_entry;
7620                         continue;
7621                 }
7622
7623                 /*
7624                  *      Verification succeeded.
7625                  */
7626
7627         VerificationSuccessful: ;
7628
7629                 if (result == KERN_MEMORY_RESTART_COPY)
7630                         goto RestartCopy;
7631
7632                 /*
7633                  *      Copy succeeded.
7634                  */
7635
7636         CopySuccessful: ;
7637
7638                 /*
7639                  *      Link in the new copy entry.
7640                  */
7641
7642                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
7643                                        new_entry);
7644
7645                 /*
7646                  *      Determine whether the entire region
7647                  *      has been copied.
7648                  */
7649                 src_base = src_start;
7650                 src_start = new_entry->vme_end;
7651                 new_entry = VM_MAP_ENTRY_NULL;
7652                 while ((src_start >= src_end) && (src_end != 0)) {
7653                         if (src_map != base_map) {
7654                                 submap_map_t    *ptr;
7655
7656                                 ptr = parent_maps;
7657                                 assert(ptr != NULL);
7658                                 parent_maps = parent_maps->next;
7659
7660                                 /* fix up the damage we did in that submap */
7661                                 vm_map_simplify_range(src_map,
7662                                                       src_base,
7663                                                       src_end);
7664
7665                                 vm_map_unlock(src_map);
7666                                 vm_map_deallocate(src_map);
7667                                 vm_map_lock(ptr->parent_map);
7668                                 src_map = ptr->parent_map;
7669                                 src_base = ptr->base_start;
7670                                 src_start = ptr->base_start + ptr->base_len;
7671                                 src_end = ptr->base_end;
7672                                 if ((src_end > src_start) &&
7673                                     !vm_map_lookup_entry(
7674                                             src_map, src_start, &tmp_entry))
7675                                         RETURN(KERN_INVALID_ADDRESS);
7676                                 kfree(ptr, sizeof(submap_map_t));
7677                                 if(parent_maps == NULL)
7678                                         map_share = FALSE;
7679                                 src_entry = tmp_entry->vme_prev;
7680                         } else
7681                                 break;
7682                 }
7683                 if ((src_start >= src_end) && (src_end != 0))
7684                         break;
7685
7686                 /*
7687                  *      Verify that there are no gaps in the region
7688                  */
7689
7690                 tmp_entry = src_entry->vme_next;
7691                 if ((tmp_entry->vme_start != src_start) ||
7692                     (tmp_entry == vm_map_to_entry(src_map)))
7693                         RETURN(KERN_INVALID_ADDRESS);
7694         }
7695
7696         /*
7697          * If the source should be destroyed, do it now, since the
7698          * copy was successful.
7699          */
7700         if (src_destroy) {
7701                 (void) vm_map_delete(src_map,
7702                                      vm_map_trunc_page(src_addr),
7703                                      src_end,
7704                                      (src_map == kernel_map) ?
7705                                      VM_MAP_REMOVE_KUNWIRE :
7706                                      VM_MAP_NO_FLAGS,
7707                                      VM_MAP_NULL);
7708         } else {
7709                 /* fix up the damage we did in the base map */
7710                 vm_map_simplify_range(src_map,
7711                                       vm_map_trunc_page(src_addr),
7712                                       vm_map_round_page(src_end));
7713         }
7714
7715         vm_map_unlock(src_map);
7716
7717         /* Fix-up start and end points in copy.  This is necessary */
7718         /* when the various entries in the copy object were picked */
7719         /* up from different sub-maps */
7720
7721         tmp_entry = vm_map_copy_first_entry(copy);
7722         while (tmp_entry != vm_map_copy_to_entry(copy)) {
7723                 tmp_entry->vme_end = copy_addr +
7724                         (tmp_entry->vme_end - tmp_entry->vme_start);
7725                 tmp_entry->vme_start = copy_addr;
7726                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
7727                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
7728         }
7729
7730         *copy_result = copy;
7731         return(KERN_SUCCESS);
7732
7733 #undef  RETURN
7734 }
7735
7736 /*
7737  *      vm_map_copyin_object:
7738  *
7739  *      Create a copy object from an object.
7740  *      Our caller donates an object reference.
7741  */
7742
7743 kern_return_t
7744 vm_map_copyin_object(
7745         vm_object_t             object,
7746         vm_object_offset_t      offset, /* offset of region in object */
7747         vm_object_size_t        size,   /* size of region in object */
7748         vm_map_copy_t   *copy_result)   /* OUT */
7749 {
7750         vm_map_copy_t   copy;           /* Resulting copy */
7751
7752         /*
7753          *      We drop the object into a special copy object
7754          *      that contains the object directly.
7755          */
7756
7757         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7758         copy->type = VM_MAP_COPY_OBJECT;
7759         copy->cpy_object = object;
7760         copy->offset = offset;
7761         copy->size = size;
7762
7763         *copy_result = copy;
7764         return(KERN_SUCCESS);
7765 }
7766
7767 static void
7768 vm_map_fork_share(
7769         vm_map_t        old_map,
7770         vm_map_entry_t  old_entry,
7771         vm_map_t        new_map)
7772 {
7773         vm_object_t     object;
7774         vm_map_entry_t  new_entry;
7775
7776         /*
7777          *      New sharing code.  New map entry
7778          *      references original object.  Internal
7779          *      objects use asynchronous copy algorithm for
7780          *      future copies.  First make sure we have
7781          *      the right object.  If we need a shadow,
7782          *      or someone else already has one, then
7783          *      make a new shadow and share it.
7784          */
7785
7786         object = old_entry->object.vm_object;
7787         if (old_entry->is_sub_map) {
7788                 assert(old_entry->wired_count == 0);
7789 #ifndef NO_NESTED_PMAP
7790                 if(old_entry->use_pmap) {
7791                         kern_return_t   result;
7792
7793                         result = pmap_nest(new_map->pmap,
7794                                            (old_entry->object.sub_map)->pmap,
7795                                            (addr64_t)old_entry->vme_start,
7796                                            (addr64_t)old_entry->vme_start,
7797                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
7798                         if(result)
7799                                 panic("vm_map_fork_share: pmap_nest failed!");
7800                 }
7801 #endif  /* NO_NESTED_PMAP */
7802         } else if (object == VM_OBJECT_NULL) {
7803                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
7804                                                             old_entry->vme_start));
7805                 old_entry->offset = 0;
7806                 old_entry->object.vm_object = object;
7807                 assert(!old_entry->needs_copy);
7808         } else if (object->copy_strategy !=
7809                    MEMORY_OBJECT_COPY_SYMMETRIC) {
7810
7811                 /*
7812                  *      We are already using an asymmetric
7813                  *      copy, and therefore we already have
7814                  *      the right object.
7815                  */
7816
7817                 assert(! old_entry->needs_copy);
7818         }
7819         else if (old_entry->needs_copy ||       /* case 1 */
7820                  object->shadowed ||            /* case 2 */
7821                  (!object->true_share &&        /* case 3 */
7822                   !old_entry->is_shared &&
7823                   (object->size >
7824                    (vm_map_size_t)(old_entry->vme_end -
7825                                    old_entry->vme_start)))) {
7826
7827                 /*
7828                  *      We need to create a shadow.
7829                  *      There are three cases here.
7830                  *      In the first case, we need to
7831                  *      complete a deferred symmetrical
7832                  *      copy that we participated in.
7833                  *      In the second and third cases,
7834                  *      we need to create the shadow so
7835                  *      that changes that we make to the
7836                  *      object do not interfere with
7837                  *      any symmetrical copies which
7838                  *      have occured (case 2) or which
7839                  *      might occur (case 3).
7840                  *
7841                  *      The first case is when we had
7842                  *      deferred shadow object creation
7843                  *      via the entry->needs_copy mechanism.
7844                  *      This mechanism only works when
7845                  *      only one entry points to the source
7846                  *      object, and we are about to create
7847                  *      a second entry pointing to the
7848                  *      same object. The problem is that
7849                  *      there is no way of mapping from
7850                  *      an object to the entries pointing
7851                  *      to it. (Deferred shadow creation
7852                  *      works with one entry because occurs
7853                  *      at fault time, and we walk from the
7854                  *      entry to the object when handling
7855                  *      the fault.)
7856                  *
7857                  *      The second case is when the object
7858                  *      to be shared has already been copied
7859                  *      with a symmetric copy, but we point
7860                  *      directly to the object without
7861                  *      needs_copy set in our entry. (This
7862                  *      can happen because different ranges
7863                  *      of an object can be pointed to by
7864                  *      different entries. In particular,
7865                  *      a single entry pointing to an object
7866                  *      can be split by a call to vm_inherit,
7867                  *      which, combined with task_create, can
7868                  *      result in the different entries
7869                  *      having different needs_copy values.)
7870                  *      The shadowed flag in the object allows
7871                  *      us to detect this case. The problem
7872                  *      with this case is that if this object
7873                  *      has or will have shadows, then we
7874                  *      must not perform an asymmetric copy
7875                  *      of this object, since such a copy
7876                  *      allows the object to be changed, which
7877                  *      will break the previous symmetrical
7878                  *      copies (which rely upon the object
7879                  *      not changing). In a sense, the shadowed
7880                  *      flag says "don't change this object".
7881                  *      We fix this by creating a shadow
7882                  *      object for this object, and sharing
7883                  *      that. This works because we are free
7884                  *      to change the shadow object (and thus
7885                  *      to use an asymmetric copy strategy);
7886                  *      this is also semantically correct,
7887                  *      since this object is temporary, and
7888                  *      therefore a copy of the object is
7889                  *      as good as the object itself. (This
7890                  *      is not true for permanent objects,
7891                  *      since the pager needs to see changes,
7892                  *      which won't happen if the changes
7893                  *      are made to a copy.)
7894                  *
7895                  *      The third case is when the object
7896                  *      to be shared has parts sticking
7897                  *      outside of the entry we're working
7898                  *      with, and thus may in the future
7899                  *      be subject to a symmetrical copy.
7900                  *      (This is a preemptive version of
7901                  *      case 2.)
7902                  */
7903
7904                 vm_object_shadow(&old_entry->object.vm_object,
7905                                  &old_entry->offset,
7906                                  (vm_map_size_t) (old_entry->vme_end -
7907                                                   old_entry->vme_start));
7908
7909                 /*
7910                  *      If we're making a shadow for other than
7911                  *      copy on write reasons, then we have
7912                  *      to remove write permission.
7913                  */
7914
7915                 if (!old_entry->needs_copy &&
7916                     (old_entry->protection & VM_PROT_WRITE)) {
7917                         vm_prot_t prot;
7918
7919                         prot = old_entry->protection & ~VM_PROT_WRITE;
7920
7921                         if (override_nx(old_map, old_entry->alias) && prot)
7922                                 prot |= VM_PROT_EXECUTE;
7923
7924                         if (old_map->mapped) {
7925                                 vm_object_pmap_protect(
7926                                         old_entry->object.vm_object,
7927                                         old_entry->offset,
7928                                         (old_entry->vme_end -
7929                                          old_entry->vme_start),
7930                                         PMAP_NULL,
7931                                         old_entry->vme_start,
7932                                         prot);
7933                         } else {
7934                                 pmap_protect(old_map->pmap,
7935                                              old_entry->vme_start,
7936                                              old_entry->vme_end,
7937                                              prot);
7938                         }
7939                 }
7940
7941                 old_entry->needs_copy = FALSE;
7942                 object = old_entry->object.vm_object;
7943         }
7944
7945         /*
7946          *      If object was using a symmetric copy strategy,
7947          *      change its copy strategy to the default
7948          *      asymmetric copy strategy, which is copy_delay
7949          *      in the non-norma case and copy_call in the
7950          *      norma case. Bump the reference count for the
7951          *      new entry.
7952          */
7953
7954         if(old_entry->is_sub_map) {
7955                 vm_map_lock(old_entry->object.sub_map);
7956                 vm_map_reference(old_entry->object.sub_map);
7957                 vm_map_unlock(old_entry->object.sub_map);
7958         } else {
7959                 vm_object_lock(object);
7960                 vm_object_reference_locked(object);
7961                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
7962                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
7963                 }
7964                 vm_object_unlock(object);
7965         }
7966
7967         /*
7968          *      Clone the entry, using object ref from above.
7969          *      Mark both entries as shared.
7970          */
7971
7972         new_entry = vm_map_entry_create(new_map);
7973         vm_map_entry_copy(new_entry, old_entry);
7974         old_entry->is_shared = TRUE;
7975         new_entry->is_shared = TRUE;
7976
7977         /*
7978          *      Insert the entry into the new map -- we
7979          *      know we're inserting at the end of the new
7980          *      map.
7981          */
7982
7983         vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
7984
7985         /*
7986          *      Update the physical map
7987          */
7988
7989         if (old_entry->is_sub_map) {
7990                 /* Bill Angell pmap support goes here */
7991         } else {
7992                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
7993                           old_entry->vme_end - old_entry->vme_start,
7994                           old_entry->vme_start);
7995         }
7996 }
7997
7998 static boolean_t
7999 vm_map_fork_copy(
8000         vm_map_t        old_map,
8001         vm_map_entry_t  *old_entry_p,
8002         vm_map_t        new_map)
8003 {
8004         vm_map_entry_t old_entry = *old_entry_p;
8005         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8006         vm_map_offset_t start = old_entry->vme_start;
8007         vm_map_copy_t copy;
8008         vm_map_entry_t last = vm_map_last_entry(new_map);
8009
8010         vm_map_unlock(old_map);
8011         /*
8012          *      Use maxprot version of copyin because we
8013          *      care about whether this memory can ever
8014          *      be accessed, not just whether it's accessible
8015          *      right now.
8016          */
8017         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8018             != KERN_SUCCESS) {
8019                 /*
8020                  *      The map might have changed while it
8021                  *      was unlocked, check it again.  Skip
8022                  *      any blank space or permanently
8023                  *      unreadable region.
8024                  */
8025                 vm_map_lock(old_map);
8026                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8027                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8028                         last = last->vme_next;
8029                 }
8030                 *old_entry_p = last;
8031
8032                 /*
8033                  * XXX  For some error returns, want to
8034                  * XXX  skip to the next element.  Note
8035                  *      that INVALID_ADDRESS and
8036                  *      PROTECTION_FAILURE are handled above.
8037                  */
8038
8039                 return FALSE;
8040         }
8041
8042         /*
8043          *      Insert the copy into the new map
8044          */
8045
8046         vm_map_copy_insert(new_map, last, copy);
8047
8048         /*
8049          *      Pick up the traversal at the end of
8050          *      the copied region.
8051          */
8052
8053         vm_map_lock(old_map);
8054         start += entry_size;
8055         if (! vm_map_lookup_entry(old_map, start, &last)) {
8056                 last = last->vme_next;
8057         } else {
8058                 if (last->vme_start == start) {
8059                         /*
8060                          * No need to clip here and we don't
8061                          * want to cause any unnecessary
8062                          * unnesting...
8063                          */
8064                 } else {
8065                         vm_map_clip_start(old_map, last, start);
8066                 }
8067         }
8068         *old_entry_p = last;
8069
8070         return TRUE;
8071 }
8072
8073 /*
8074  *      vm_map_fork:
8075  *
8076  *      Create and return a new map based on the old
8077  *      map, according to the inheritance values on the
8078  *      regions in that map.
8079  *
8080  *      The source map must not be locked.
8081  */
8082 vm_map_t
8083 vm_map_fork(
8084         vm_map_t        old_map)
8085 {
8086         pmap_t          new_pmap;
8087         vm_map_t        new_map;
8088         vm_map_entry_t  old_entry;
8089         vm_map_size_t   new_size = 0, entry_size;
8090         vm_map_entry_t  new_entry;
8091         boolean_t       src_needs_copy;
8092         boolean_t       new_entry_needs_copy;
8093
8094         new_pmap = pmap_create((vm_map_size_t) 0,
8095 #if defined(__i386__) || defined(__x86_64__)
8096                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8097 #else
8098                                0
8099 #endif
8100                                );
8101 #if defined(__i386__)
8102         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8103                 pmap_set_4GB_pagezero(new_pmap);
8104 #endif
8105
8106         vm_map_reference_swap(old_map);
8107         vm_map_lock(old_map);
8108
8109         new_map = vm_map_create(new_pmap,
8110                                 old_map->min_offset,
8111                                 old_map->max_offset,
8112                                 old_map->hdr.entries_pageable);
8113
8114         for (
8115                 old_entry = vm_map_first_entry(old_map);
8116                 old_entry != vm_map_to_entry(old_map);
8117                 ) {
8118
8119                 entry_size = old_entry->vme_end - old_entry->vme_start;
8120
8121                 switch (old_entry->inheritance) {
8122                 case VM_INHERIT_NONE:
8123                         break;
8124
8125                 case VM_INHERIT_SHARE:
8126                         vm_map_fork_share(old_map, old_entry, new_map);
8127                         new_size += entry_size;
8128                         break;
8129
8130                 case VM_INHERIT_COPY:
8131
8132                         /*
8133                          *      Inline the copy_quickly case;
8134                          *      upon failure, fall back on call
8135                          *      to vm_map_fork_copy.
8136                          */
8137
8138                         if(old_entry->is_sub_map)
8139                                 break;
8140                         if ((old_entry->wired_count != 0) ||
8141                             ((old_entry->object.vm_object != NULL) &&
8142                              (old_entry->object.vm_object->true_share))) {
8143                                 goto slow_vm_map_fork_copy;
8144                         }
8145
8146                         new_entry = vm_map_entry_create(new_map);
8147                         vm_map_entry_copy(new_entry, old_entry);
8148                         /* clear address space specifics */
8149                         new_entry->use_pmap = FALSE;
8150
8151                         if (! vm_object_copy_quickly(
8152                                     &new_entry->object.vm_object,
8153                                     old_entry->offset,
8154                                     (old_entry->vme_end -
8155                                      old_entry->vme_start),
8156                                     &src_needs_copy,
8157                                     &new_entry_needs_copy)) {
8158                                 vm_map_entry_dispose(new_map, new_entry);
8159                                 goto slow_vm_map_fork_copy;
8160                         }
8161
8162                         /*
8163                          *      Handle copy-on-write obligations
8164                          */
8165
8166                         if (src_needs_copy && !old_entry->needs_copy) {
8167                                 vm_prot_t prot;
8168
8169                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8170
8171                                 if (override_nx(old_map, old_entry->alias) && prot)
8172                                         prot |= VM_PROT_EXECUTE;
8173
8174                                 vm_object_pmap_protect(
8175                                         old_entry->object.vm_object,
8176                                         old_entry->offset,
8177                                         (old_entry->vme_end -
8178                                          old_entry->vme_start),
8179                                         ((old_entry->is_shared
8180                                           || old_map->mapped)
8181                                          ? PMAP_NULL :
8182                                          old_map->pmap),
8183                                         old_entry->vme_start,
8184                                         prot);
8185
8186                                 old_entry->needs_copy = TRUE;
8187                         }
8188                         new_entry->needs_copy = new_entry_needs_copy;
8189
8190                         /*
8191                          *      Insert the entry at the end
8192                          *      of the map.
8193                          */
8194
8195                         vm_map_entry_link(new_map, vm_map_last_entry(new_map),
8196                                           new_entry);
8197                         new_size += entry_size;
8198                         break;
8199
8200                 slow_vm_map_fork_copy:
8201                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8202                                 new_size += entry_size;
8203                         }
8204                         continue;
8205                 }
8206                 old_entry = old_entry->vme_next;
8207         }
8208
8209         new_map->size = new_size;
8210         vm_map_unlock(old_map);
8211         vm_map_deallocate(old_map);
8212
8213         return(new_map);
8214 }
8215
8216 /*
8217  * vm_map_exec:
8218  *
8219  *      Setup the "new_map" with the proper execution environment according
8220  *      to the type of executable (platform, 64bit, chroot environment).
8221  *      Map the comm page and shared region, etc...
8222  */
8223 kern_return_t
8224 vm_map_exec(
8225         vm_map_t        new_map,
8226         task_t          task,
8227         void            *fsroot,
8228         cpu_type_t      cpu)
8229 {
8230         SHARED_REGION_TRACE_DEBUG(
8231                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8232                  current_task(), new_map, task, fsroot, cpu));
8233         (void) vm_commpage_enter(new_map, task);
8234         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8235         SHARED_REGION_TRACE_DEBUG(
8236                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8237                  current_task(), new_map, task, fsroot, cpu));
8238         return KERN_SUCCESS;
8239 }
8240
8241 /*
8242  *      vm_map_lookup_locked:
8243  *
8244  *      Finds the VM object, offset, and
8245  *      protection for a given virtual address in the
8246  *      specified map, assuming a page fault of the
8247  *      type specified.
8248  *
8249  *      Returns the (object, offset, protection) for
8250  *      this address, whether it is wired down, and whether
8251  *      this map has the only reference to the data in question.
8252  *      In order to later verify this lookup, a "version"
8253  *      is returned.
8254  *
8255  *      The map MUST be locked by the caller and WILL be
8256  *      locked on exit.  In order to guarantee the
8257  *      existence of the returned object, it is returned
8258  *      locked.
8259  *
8260  *      If a lookup is requested with "write protection"
8261  *      specified, the map may be changed to perform virtual
8262  *      copying operations, although the data referenced will
8263  *      remain the same.
8264  */
8265 kern_return_t
8266 vm_map_lookup_locked(
8267         vm_map_t                *var_map,       /* IN/OUT */
8268         vm_map_offset_t         vaddr,
8269         vm_prot_t               fault_type,
8270         int                     object_lock_type,
8271         vm_map_version_t        *out_version,   /* OUT */
8272         vm_object_t             *object,        /* OUT */
8273         vm_object_offset_t      *offset,        /* OUT */
8274         vm_prot_t               *out_prot,      /* OUT */
8275         boolean_t               *wired,         /* OUT */
8276         vm_object_fault_info_t  fault_info,     /* OUT */
8277         vm_map_t                *real_map)
8278 {
8279         vm_map_entry_t                  entry;
8280         register vm_map_t               map = *var_map;
8281         vm_map_t                        old_map = *var_map;
8282         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8283         vm_map_offset_t                 cow_parent_vaddr = 0;
8284         vm_map_offset_t                 old_start = 0;
8285         vm_map_offset_t                 old_end = 0;
8286         register vm_prot_t              prot;
8287
8288         *real_map = map;
8289 RetryLookup: ;
8290
8291         /*
8292          *      If the map has an interesting hint, try it before calling
8293          *      full blown lookup routine.
8294          */
8295         entry = map->hint;
8296
8297         if ((entry == vm_map_to_entry(map)) ||
8298             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8299                 vm_map_entry_t  tmp_entry;
8300
8301                 /*
8302                  *      Entry was either not a valid hint, or the vaddr
8303                  *      was not contained in the entry, so do a full lookup.
8304                  */
8305                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8306                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8307                                 vm_map_unlock(cow_sub_map_parent);
8308                         if((*real_map != map)
8309                            && (*real_map != cow_sub_map_parent))
8310                                 vm_map_unlock(*real_map);
8311                         return KERN_INVALID_ADDRESS;
8312                 }
8313
8314                 entry = tmp_entry;
8315         }
8316         if(map == old_map) {
8317                 old_start = entry->vme_start;
8318                 old_end = entry->vme_end;
8319         }
8320
8321         /*
8322          *      Handle submaps.  Drop lock on upper map, submap is
8323          *      returned locked.
8324          */
8325
8326 submap_recurse:
8327         if (entry->is_sub_map) {
8328                 vm_map_offset_t         local_vaddr;
8329                 vm_map_offset_t         end_delta;
8330                 vm_map_offset_t         start_delta;
8331                 vm_map_entry_t          submap_entry;
8332                 boolean_t               mapped_needs_copy=FALSE;
8333
8334                 local_vaddr = vaddr;
8335
8336                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8337                         /* if real_map equals map we unlock below */
8338                         if ((*real_map != map) &&
8339                             (*real_map != cow_sub_map_parent))
8340                                 vm_map_unlock(*real_map);
8341                         *real_map = entry->object.sub_map;
8342                 }
8343
8344                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8345                         if (!mapped_needs_copy) {
8346                                 if (vm_map_lock_read_to_write(map)) {
8347                                         vm_map_lock_read(map);
8348                                         /* XXX FBDP: entry still valid ? */
8349                                         if(*real_map == entry->object.sub_map)
8350                                                 *real_map = map;
8351                                         goto RetryLookup;
8352                                 }
8353                                 vm_map_lock_read(entry->object.sub_map);
8354                                 cow_sub_map_parent = map;
8355                                 /* reset base to map before cow object */
8356                                 /* this is the map which will accept   */
8357                                 /* the new cow object */
8358                                 old_start = entry->vme_start;
8359                                 old_end = entry->vme_end;
8360                                 cow_parent_vaddr = vaddr;
8361                                 mapped_needs_copy = TRUE;
8362                         } else {
8363                                 vm_map_lock_read(entry->object.sub_map);
8364                                 if((cow_sub_map_parent != map) &&
8365                                    (*real_map != map))
8366                                         vm_map_unlock(map);
8367                         }
8368                 } else {
8369                         vm_map_lock_read(entry->object.sub_map);
8370                         /* leave map locked if it is a target */
8371                         /* cow sub_map above otherwise, just  */
8372                         /* follow the maps down to the object */
8373                         /* here we unlock knowing we are not  */
8374                         /* revisiting the map.  */
8375                         if((*real_map != map) && (map != cow_sub_map_parent))
8376                                 vm_map_unlock_read(map);
8377                 }
8378
8379                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8380                 *var_map = map = entry->object.sub_map;
8381
8382                 /* calculate the offset in the submap for vaddr */
8383                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8384
8385         RetrySubMap:
8386                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8387                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8388                                 vm_map_unlock(cow_sub_map_parent);
8389                         }
8390                         if((*real_map != map)
8391                            && (*real_map != cow_sub_map_parent)) {
8392                                 vm_map_unlock(*real_map);
8393                         }
8394                         *real_map = map;
8395                         return KERN_INVALID_ADDRESS;
8396                 }
8397
8398                 /* find the attenuated shadow of the underlying object */
8399                 /* on our target map */
8400
8401                 /* in english the submap object may extend beyond the     */
8402                 /* region mapped by the entry or, may only fill a portion */
8403                 /* of it.  For our purposes, we only care if the object   */
8404                 /* doesn't fill.  In this case the area which will        */
8405                 /* ultimately be clipped in the top map will only need    */
8406                 /* to be as big as the portion of the underlying entry    */
8407                 /* which is mapped */
8408                 start_delta = submap_entry->vme_start > entry->offset ?
8409                         submap_entry->vme_start - entry->offset : 0;
8410
8411                 end_delta =
8412                         (entry->offset + start_delta + (old_end - old_start)) <=
8413                         submap_entry->vme_end ?
8414                         0 : (entry->offset +
8415                              (old_end - old_start))
8416                         - submap_entry->vme_end;
8417
8418                 old_start += start_delta;
8419                 old_end -= end_delta;
8420
8421                 if(submap_entry->is_sub_map) {
8422                         entry = submap_entry;
8423                         vaddr = local_vaddr;
8424                         goto submap_recurse;
8425                 }
8426
8427                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8428
8429                         vm_object_t     sub_object, copy_object;
8430                         vm_object_offset_t copy_offset;
8431                         vm_map_offset_t local_start;
8432                         vm_map_offset_t local_end;
8433                         boolean_t               copied_slowly = FALSE;
8434
8435                         if (vm_map_lock_read_to_write(map)) {
8436                                 vm_map_lock_read(map);
8437                                 old_start -= start_delta;
8438                                 old_end += end_delta;
8439                                 goto RetrySubMap;
8440                         }
8441
8442
8443                         sub_object = submap_entry->object.vm_object;
8444                         if (sub_object == VM_OBJECT_NULL) {
8445                                 sub_object =
8446                                         vm_object_allocate(
8447                                                 (vm_map_size_t)
8448                                                 (submap_entry->vme_end -
8449                                                  submap_entry->vme_start));
8450                                 submap_entry->object.vm_object = sub_object;
8451                                 submap_entry->offset = 0;
8452                         }
8453                         local_start =  local_vaddr -
8454                                 (cow_parent_vaddr - old_start);
8455                         local_end = local_vaddr +
8456                                 (old_end - cow_parent_vaddr);
8457                         vm_map_clip_start(map, submap_entry, local_start);
8458                         vm_map_clip_end(map, submap_entry, local_end);
8459                         /* unnesting was done in vm_map_clip_start/end() */
8460                         assert(!submap_entry->use_pmap);
8461
8462                         /* This is the COW case, lets connect */
8463                         /* an entry in our space to the underlying */
8464                         /* object in the submap, bypassing the  */
8465                         /* submap. */
8466
8467
8468                         if(submap_entry->wired_count != 0 ||
8469                            (sub_object->copy_strategy ==
8470                             MEMORY_OBJECT_COPY_NONE)) {
8471                                 vm_object_lock(sub_object);
8472                                 vm_object_copy_slowly(sub_object,
8473                                                       submap_entry->offset,
8474                                                       (submap_entry->vme_end -
8475                                                        submap_entry->vme_start),
8476                                                       FALSE,
8477                                                       &copy_object);
8478                                 copied_slowly = TRUE;
8479                         } else {
8480
8481                                 /* set up shadow object */
8482                                 copy_object = sub_object;
8483                                 vm_object_reference(copy_object);
8484                                 sub_object->shadowed = TRUE;
8485                                 submap_entry->needs_copy = TRUE;
8486
8487                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8488
8489                                 if (override_nx(map, submap_entry->alias) && prot)
8490                                         prot |= VM_PROT_EXECUTE;
8491
8492                                 vm_object_pmap_protect(
8493                                         sub_object,
8494                                         submap_entry->offset,
8495                                         submap_entry->vme_end -
8496                                         submap_entry->vme_start,
8497                                         (submap_entry->is_shared
8498                                          || map->mapped) ?
8499                                         PMAP_NULL : map->pmap,
8500                                         submap_entry->vme_start,
8501                                         prot);
8502                         }
8503
8504                         /*
8505                          * Adjust the fault offset to the submap entry.
8506                          */
8507                         copy_offset = (local_vaddr -
8508                                        submap_entry->vme_start +
8509                                        submap_entry->offset);
8510
8511                         /* This works diffently than the   */
8512                         /* normal submap case. We go back  */
8513                         /* to the parent of the cow map and*/
8514                         /* clip out the target portion of  */
8515                         /* the sub_map, substituting the   */
8516                         /* new copy object,                */
8517
8518                         vm_map_unlock(map);
8519                         local_start = old_start;
8520                         local_end = old_end;
8521                         map = cow_sub_map_parent;
8522                         *var_map = cow_sub_map_parent;
8523                         vaddr = cow_parent_vaddr;
8524                         cow_sub_map_parent = NULL;
8525
8526                         if(!vm_map_lookup_entry(map,
8527                                                 vaddr, &entry)) {
8528                                 vm_object_deallocate(
8529                                         copy_object);
8530                                 vm_map_lock_write_to_read(map);
8531                                 return KERN_INVALID_ADDRESS;
8532                         }
8533
8534                         /* clip out the portion of space */
8535                         /* mapped by the sub map which   */
8536                         /* corresponds to the underlying */
8537                         /* object */
8538
8539                         /*
8540                          * Clip (and unnest) the smallest nested chunk
8541                          * possible around the faulting address...
8542                          */
8543                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8544                         local_end = local_start + pmap_nesting_size_min;
8545                         /*
8546                          * ... but don't go beyond the "old_start" to "old_end"
8547                          * range, to avoid spanning over another VM region
8548                          * with a possibly different VM object and/or offset.
8549                          */
8550                         if (local_start < old_start) {
8551                                 local_start = old_start;
8552                         }
8553                         if (local_end > old_end) {
8554                                 local_end = old_end;
8555                         }
8556                         /*
8557                          * Adjust copy_offset to the start of the range.
8558                          */
8559                         copy_offset -= (vaddr - local_start);
8560
8561                         vm_map_clip_start(map, entry, local_start);
8562                         vm_map_clip_end(map, entry, local_end);
8563                         /* unnesting was done in vm_map_clip_start/end() */
8564                         assert(!entry->use_pmap);
8565
8566                         /* substitute copy object for */
8567                         /* shared map entry           */
8568                         vm_map_deallocate(entry->object.sub_map);
8569                         entry->is_sub_map = FALSE;
8570                         entry->object.vm_object = copy_object;
8571
8572                         /* propagate the submap entry's protections */
8573                         entry->protection |= submap_entry->protection;
8574                         entry->max_protection |= submap_entry->max_protection;
8575
8576                         if(copied_slowly) {
8577                                 entry->offset = local_start - old_start;
8578                                 entry->needs_copy = FALSE;
8579                                 entry->is_shared = FALSE;
8580                         } else {
8581                                 entry->offset = copy_offset;
8582                                 entry->needs_copy = TRUE;
8583                                 if(entry->inheritance == VM_INHERIT_SHARE)
8584                                         entry->inheritance = VM_INHERIT_COPY;
8585                                 if (map != old_map)
8586                                         entry->is_shared = TRUE;
8587                         }
8588                         if(entry->inheritance == VM_INHERIT_SHARE)
8589                                 entry->inheritance = VM_INHERIT_COPY;
8590
8591                         vm_map_lock_write_to_read(map);
8592                 } else {
8593                         if((cow_sub_map_parent)
8594                            && (cow_sub_map_parent != *real_map)
8595                            && (cow_sub_map_parent != map)) {
8596                                 vm_map_unlock(cow_sub_map_parent);
8597                         }
8598                         entry = submap_entry;
8599                         vaddr = local_vaddr;
8600                 }
8601         }
8602
8603         /*
8604          *      Check whether this task is allowed to have
8605          *      this page.
8606          */
8607
8608         prot = entry->protection;
8609
8610         if (override_nx(map, entry->alias) && prot) {
8611                 /*
8612                  * HACK -- if not a stack, then allow execution
8613                  */
8614                 prot |= VM_PROT_EXECUTE;
8615         }
8616
8617         if ((fault_type & (prot)) != fault_type) {
8618                 if (*real_map != map) {
8619                         vm_map_unlock(*real_map);
8620                 }
8621                 *real_map = map;
8622
8623                 if ((fault_type & VM_PROT_EXECUTE) && prot)
8624                         log_stack_execution_failure((addr64_t)vaddr, prot);
8625
8626                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
8627                 return KERN_PROTECTION_FAILURE;
8628         }
8629
8630         /*
8631          *      If this page is not pageable, we have to get
8632          *      it for all possible accesses.
8633          */
8634
8635         *wired = (entry->wired_count != 0);
8636         if (*wired)
8637                 fault_type = prot;
8638
8639         /*
8640          *      If the entry was copy-on-write, we either ...
8641          */
8642
8643         if (entry->needs_copy) {
8644                 /*
8645                  *      If we want to write the page, we may as well
8646                  *      handle that now since we've got the map locked.
8647                  *
8648                  *      If we don't need to write the page, we just
8649                  *      demote the permissions allowed.
8650                  */
8651
8652                 if ((fault_type & VM_PROT_WRITE) || *wired) {
8653                         /*
8654                          *      Make a new object, and place it in the
8655                          *      object chain.  Note that no new references
8656                          *      have appeared -- one just moved from the
8657                          *      map to the new object.
8658                          */
8659
8660                         if (vm_map_lock_read_to_write(map)) {
8661                                 vm_map_lock_read(map);
8662                                 goto RetryLookup;
8663                         }
8664                         vm_object_shadow(&entry->object.vm_object,
8665                                          &entry->offset,
8666                                          (vm_map_size_t) (entry->vme_end -
8667                                                           entry->vme_start));
8668
8669                         entry->object.vm_object->shadowed = TRUE;
8670                         entry->needs_copy = FALSE;
8671                         vm_map_lock_write_to_read(map);
8672                 }
8673                 else {
8674                         /*
8675                          *      We're attempting to read a copy-on-write
8676                          *      page -- don't allow writes.
8677                          */
8678
8679                         prot &= (~VM_PROT_WRITE);
8680                 }
8681         }
8682
8683         /*
8684          *      Create an object if necessary.
8685          */
8686         if (entry->object.vm_object == VM_OBJECT_NULL) {
8687
8688                 if (vm_map_lock_read_to_write(map)) {
8689                         vm_map_lock_read(map);
8690                         goto RetryLookup;
8691                 }
8692
8693                 entry->object.vm_object = vm_object_allocate(
8694                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
8695                 entry->offset = 0;
8696                 vm_map_lock_write_to_read(map);
8697         }
8698
8699         /*
8700          *      Return the object/offset from this entry.  If the entry
8701          *      was copy-on-write or empty, it has been fixed up.  Also
8702          *      return the protection.
8703          */
8704
8705         *offset = (vaddr - entry->vme_start) + entry->offset;
8706         *object = entry->object.vm_object;
8707         *out_prot = prot;
8708
8709         if (fault_info) {
8710                 fault_info->interruptible = THREAD_UNINT; /* for now... */
8711                 /* ... the caller will change "interruptible" if needed */
8712                 fault_info->cluster_size = 0;
8713                 fault_info->user_tag = entry->alias;
8714                 fault_info->behavior = entry->behavior;
8715                 fault_info->lo_offset = entry->offset;
8716                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
8717                 fault_info->no_cache  = entry->no_cache;
8718                 fault_info->stealth = FALSE;
8719                 fault_info->mark_zf_absent = FALSE;
8720         }
8721
8722         /*
8723          *      Lock the object to prevent it from disappearing
8724          */
8725         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
8726                 vm_object_lock(*object);
8727         else
8728                 vm_object_lock_shared(*object);
8729
8730         /*
8731          *      Save the version number
8732          */
8733
8734         out_version->main_timestamp = map->timestamp;
8735
8736         return KERN_SUCCESS;
8737 }
8738
8739
8740 /*
8741  *      vm_map_verify:
8742  *
8743  *      Verifies that the map in question has not changed
8744  *      since the given version.  If successful, the map
8745  *      will not change until vm_map_verify_done() is called.
8746  */
8747 boolean_t
8748 vm_map_verify(
8749         register vm_map_t               map,
8750         register vm_map_version_t       *version)       /* REF */
8751 {
8752         boolean_t       result;
8753
8754         vm_map_lock_read(map);
8755         result = (map->timestamp == version->main_timestamp);
8756
8757         if (!result)
8758                 vm_map_unlock_read(map);
8759
8760         return(result);
8761 }
8762
8763 /*
8764  *      vm_map_verify_done:
8765  *
8766  *      Releases locks acquired by a vm_map_verify.
8767  *
8768  *      This is now a macro in vm/vm_map.h.  It does a
8769  *      vm_map_unlock_read on the map.
8770  */
8771
8772
8773 /*
8774  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
8775  *      Goes away after regular vm_region_recurse function migrates to
8776  *      64 bits
8777  *      vm_region_recurse: A form of vm_region which follows the
8778  *      submaps in a target map
8779  *
8780  */
8781
8782 kern_return_t
8783 vm_map_region_recurse_64(
8784         vm_map_t                 map,
8785         vm_map_offset_t *address,               /* IN/OUT */
8786         vm_map_size_t           *size,                  /* OUT */
8787         natural_t               *nesting_depth, /* IN/OUT */
8788         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
8789         mach_msg_type_number_t  *count) /* IN/OUT */
8790 {
8791         vm_region_extended_info_data_t  extended;
8792         vm_map_entry_t                  tmp_entry;
8793         vm_map_offset_t                 user_address;
8794         unsigned int                    user_max_depth;
8795
8796         /*
8797          * "curr_entry" is the VM map entry preceding or including the
8798          * address we're looking for.
8799          * "curr_map" is the map or sub-map containing "curr_entry".
8800          * "curr_offset" is the cumulated offset of "curr_map" in the
8801          * target task's address space.
8802          * "curr_depth" is the depth of "curr_map" in the chain of
8803          * sub-maps.
8804          * "curr_max_offset" is the maximum offset we should take into
8805          * account in the current map.  It may be smaller than the current
8806          * map's "max_offset" because we might not have mapped it all in
8807          * the upper level map.
8808          */
8809         vm_map_entry_t                  curr_entry;
8810         vm_map_offset_t                 curr_offset;
8811         vm_map_t                        curr_map;
8812         unsigned int                    curr_depth;
8813         vm_map_offset_t                 curr_max_offset;
8814
8815         /*
8816          * "next_" is the same as "curr_" but for the VM region immediately
8817          * after the address we're looking for.  We need to keep track of this
8818          * too because we want to return info about that region if the
8819          * address we're looking for is not mapped.
8820          */
8821         vm_map_entry_t                  next_entry;
8822         vm_map_offset_t                 next_offset;
8823         vm_map_t                        next_map;
8824         unsigned int                    next_depth;
8825         vm_map_offset_t                 next_max_offset;
8826
8827         boolean_t                       look_for_pages;
8828         vm_region_submap_short_info_64_t short_info;
8829
8830         if (map == VM_MAP_NULL) {
8831                 /* no address space to work on */
8832                 return KERN_INVALID_ARGUMENT;
8833         }
8834
8835         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
8836                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
8837                         /*
8838                          * "info" structure is not big enough and
8839                          * would overflow
8840                          */
8841                         return KERN_INVALID_ARGUMENT;
8842                 } else {
8843                         look_for_pages = FALSE;
8844                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
8845                         short_info = (vm_region_submap_short_info_64_t) submap_info;
8846                         submap_info = NULL;
8847                 }
8848         } else {
8849                 look_for_pages = TRUE;
8850                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
8851                 short_info = NULL;
8852         }
8853
8854
8855         user_address = *address;
8856         user_max_depth = *nesting_depth;
8857
8858         curr_entry = NULL;
8859         curr_map = map;
8860         curr_offset = 0;
8861         curr_depth = 0;
8862         curr_max_offset = curr_map->max_offset;
8863
8864         next_entry = NULL;
8865         next_map = NULL;
8866         next_offset = 0;
8867         next_depth = 0;
8868         next_max_offset = curr_max_offset;
8869
8870         if (not_in_kdp) {
8871                 vm_map_lock_read(curr_map);
8872         }
8873
8874         for (;;) {
8875                 if (vm_map_lookup_entry(curr_map,
8876                                         user_address - curr_offset,
8877                                         &tmp_entry)) {
8878                         /* tmp_entry contains the address we're looking for */
8879                         curr_entry = tmp_entry;
8880                 } else {
8881                         /*
8882                          * The address is not mapped.  "tmp_entry" is the
8883                          * map entry preceding the address.  We want the next
8884                          * one, if it exists.
8885                          */
8886                         curr_entry = tmp_entry->vme_next;
8887                         if (curr_entry == vm_map_to_entry(curr_map) ||
8888                             curr_entry->vme_start >= curr_max_offset) {
8889                                 /* no next entry at this level: stop looking */
8890                                 if (not_in_kdp) {
8891                                         vm_map_unlock_read(curr_map);
8892                                 }
8893                                 curr_entry = NULL;
8894                                 curr_map = NULL;
8895                                 curr_offset = 0;
8896                                 curr_depth = 0;
8897                                 curr_max_offset = 0;
8898                                 break;
8899                         }
8900                 }
8901
8902                 /*
8903                  * Is the next entry at this level closer to the address (or
8904                  * deeper in the submap chain) than the one we had
8905                  * so far ?
8906                  */
8907                 tmp_entry = curr_entry->vme_next;
8908                 if (tmp_entry == vm_map_to_entry(curr_map)) {
8909                         /* no next entry at this level */
8910                 } else if (tmp_entry->vme_start >= curr_max_offset) {
8911                         /*
8912                          * tmp_entry is beyond the scope of what we mapped of
8913                          * this submap in the upper level: ignore it.
8914                          */
8915                 } else if ((next_entry == NULL) ||
8916                            (tmp_entry->vme_start + curr_offset <=
8917                             next_entry->vme_start + next_offset)) {
8918                         /*
8919                          * We didn't have a "next_entry" or this one is
8920                          * closer to the address we're looking for:
8921                          * use this "tmp_entry" as the new "next_entry".
8922                          */
8923                         if (next_entry != NULL) {
8924                                 /* unlock the last "next_map" */
8925                                 if (next_map != curr_map && not_in_kdp) {
8926                                         vm_map_unlock_read(next_map);
8927                                 }
8928                         }
8929                         next_entry = tmp_entry;
8930                         next_map = curr_map;
8931                         next_offset = curr_offset;
8932                         next_depth = curr_depth;
8933                         next_max_offset = curr_max_offset;
8934                 }
8935
8936                 if (!curr_entry->is_sub_map ||
8937                     curr_depth >= user_max_depth) {
8938                         /*
8939                          * We hit a leaf map or we reached the maximum depth
8940                          * we could, so stop looking.  Keep the current map
8941                          * locked.
8942                          */
8943                         break;
8944                 }
8945
8946                 /*
8947                  * Get down to the next submap level.
8948                  */
8949
8950                 /*
8951                  * Lock the next level and unlock the current level,
8952                  * unless we need to keep it locked to access the "next_entry"
8953                  * later.
8954                  */
8955                 if (not_in_kdp) {
8956                         vm_map_lock_read(curr_entry->object.sub_map);
8957                 }
8958                 if (curr_map == next_map) {
8959                         /* keep "next_map" locked in case we need it */
8960                 } else {
8961                         /* release this map */
8962                         if (not_in_kdp)
8963                                 vm_map_unlock_read(curr_map);
8964                 }
8965
8966                 /*
8967                  * Adjust the offset.  "curr_entry" maps the submap
8968                  * at relative address "curr_entry->vme_start" in the
8969                  * curr_map but skips the first "curr_entry->offset"
8970                  * bytes of the submap.
8971                  * "curr_offset" always represents the offset of a virtual
8972                  * address in the curr_map relative to the absolute address
8973                  * space (i.e. the top-level VM map).
8974                  */
8975                 curr_offset +=
8976                         (curr_entry->vme_start - curr_entry->offset);
8977                 /* switch to the submap */
8978                 curr_map = curr_entry->object.sub_map;
8979                 curr_depth++;
8980                 /*
8981                  * "curr_max_offset" allows us to keep track of the
8982                  * portion of the submap that is actually mapped at this level:
8983                  * the rest of that submap is irrelevant to us, since it's not
8984                  * mapped here.
8985                  * The relevant portion of the map starts at
8986                  * "curr_entry->offset" up to the size of "curr_entry".
8987                  */
8988                 curr_max_offset =
8989                         curr_entry->vme_end - curr_entry->vme_start +
8990                         curr_entry->offset;
8991                 curr_entry = NULL;
8992         }
8993
8994         if (curr_entry == NULL) {
8995                 /* no VM region contains the address... */
8996                 if (next_entry == NULL) {
8997                         /* ... and no VM region follows it either */
8998                         return KERN_INVALID_ADDRESS;
8999                 }
9000                 /* ... gather info about the next VM region */
9001                 curr_entry = next_entry;
9002                 curr_map = next_map;    /* still locked ... */
9003                 curr_offset = next_offset;
9004                 curr_depth = next_depth;
9005                 curr_max_offset = next_max_offset;
9006         } else {
9007                 /* we won't need "next_entry" after all */
9008                 if (next_entry != NULL) {
9009                         /* release "next_map" */
9010                         if (next_map != curr_map && not_in_kdp) {
9011                                 vm_map_unlock_read(next_map);
9012                         }
9013                 }
9014         }
9015         next_entry = NULL;
9016         next_map = NULL;
9017         next_offset = 0;
9018         next_depth = 0;
9019         next_max_offset = 0;
9020
9021         *nesting_depth = curr_depth;
9022         *size = curr_entry->vme_end - curr_entry->vme_start;
9023         *address = curr_entry->vme_start + curr_offset;
9024
9025 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9026 // so probably should be a real 32b ID vs. ptr.
9027 // Current users just check for equality
9028 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9029
9030         if (look_for_pages) {
9031                 submap_info->user_tag = curr_entry->alias;
9032                 submap_info->offset = curr_entry->offset;
9033                 submap_info->protection = curr_entry->protection;
9034                 submap_info->inheritance = curr_entry->inheritance;
9035                 submap_info->max_protection = curr_entry->max_protection;
9036                 submap_info->behavior = curr_entry->behavior;
9037                 submap_info->user_wired_count = curr_entry->user_wired_count;
9038                 submap_info->is_submap = curr_entry->is_sub_map;
9039                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9040         } else {
9041                 short_info->user_tag = curr_entry->alias;
9042                 short_info->offset = curr_entry->offset;
9043                 short_info->protection = curr_entry->protection;
9044                 short_info->inheritance = curr_entry->inheritance;
9045                 short_info->max_protection = curr_entry->max_protection;
9046                 short_info->behavior = curr_entry->behavior;
9047                 short_info->user_wired_count = curr_entry->user_wired_count;
9048                 short_info->is_submap = curr_entry->is_sub_map;
9049                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9050         }
9051
9052         extended.pages_resident = 0;
9053         extended.pages_swapped_out = 0;
9054         extended.pages_shared_now_private = 0;
9055         extended.pages_dirtied = 0;
9056         extended.external_pager = 0;
9057         extended.shadow_depth = 0;
9058
9059         if (not_in_kdp) {
9060                 if (!curr_entry->is_sub_map) {
9061                         vm_map_region_walk(curr_map,
9062                                            curr_entry->vme_start,
9063                                            curr_entry,
9064                                            curr_entry->offset,
9065                                            (curr_entry->vme_end -
9066                                             curr_entry->vme_start),
9067                                            &extended,
9068                                            look_for_pages);
9069                         if (extended.external_pager &&
9070                             extended.ref_count == 2 &&
9071                             extended.share_mode == SM_SHARED) {
9072                                 extended.share_mode = SM_PRIVATE;
9073                         }
9074                 } else {
9075                         if (curr_entry->use_pmap) {
9076                                 extended.share_mode = SM_TRUESHARED;
9077                         } else {
9078                                 extended.share_mode = SM_PRIVATE;
9079                         }
9080                         extended.ref_count =
9081                                 curr_entry->object.sub_map->ref_count;
9082                 }
9083         }
9084
9085         if (look_for_pages) {
9086                 submap_info->pages_resident = extended.pages_resident;
9087                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9088                 submap_info->pages_shared_now_private =
9089                         extended.pages_shared_now_private;
9090                 submap_info->pages_dirtied = extended.pages_dirtied;
9091                 submap_info->external_pager = extended.external_pager;
9092                 submap_info->shadow_depth = extended.shadow_depth;
9093                 submap_info->share_mode = extended.share_mode;
9094                 submap_info->ref_count = extended.ref_count;
9095         } else {
9096                 short_info->external_pager = extended.external_pager;
9097                 short_info->shadow_depth = extended.shadow_depth;
9098                 short_info->share_mode = extended.share_mode;
9099                 short_info->ref_count = extended.ref_count;
9100         }
9101
9102         if (not_in_kdp) {
9103                 vm_map_unlock_read(curr_map);
9104         }
9105
9106         return KERN_SUCCESS;
9107 }
9108
9109 /*
9110  *      vm_region:
9111  *
9112  *      User call to obtain information about a region in
9113  *      a task's address map. Currently, only one flavor is
9114  *      supported.
9115  *
9116  *      XXX The reserved and behavior fields cannot be filled
9117  *          in until the vm merge from the IK is completed, and
9118  *          vm_reserve is implemented.
9119  */
9120
9121 kern_return_t
9122 vm_map_region(
9123         vm_map_t                 map,
9124         vm_map_offset_t *address,               /* IN/OUT */
9125         vm_map_size_t           *size,                  /* OUT */
9126         vm_region_flavor_t       flavor,                /* IN */
9127         vm_region_info_t         info,                  /* OUT */
9128         mach_msg_type_number_t  *count, /* IN/OUT */
9129         mach_port_t             *object_name)           /* OUT */
9130 {
9131         vm_map_entry_t          tmp_entry;
9132         vm_map_entry_t          entry;
9133         vm_map_offset_t         start;
9134
9135         if (map == VM_MAP_NULL)
9136                 return(KERN_INVALID_ARGUMENT);
9137
9138         switch (flavor) {
9139
9140         case VM_REGION_BASIC_INFO:
9141                 /* legacy for old 32-bit objects info */
9142         {
9143                 vm_region_basic_info_t  basic;
9144
9145                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9146                         return(KERN_INVALID_ARGUMENT);
9147
9148                 basic = (vm_region_basic_info_t) info;
9149                 *count = VM_REGION_BASIC_INFO_COUNT;
9150
9151                 vm_map_lock_read(map);
9152
9153                 start = *address;
9154                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9155                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9156                                 vm_map_unlock_read(map);
9157                                 return(KERN_INVALID_ADDRESS);
9158                         }
9159                 } else {
9160                         entry = tmp_entry;
9161                 }
9162
9163                 start = entry->vme_start;
9164
9165                 basic->offset = (uint32_t)entry->offset;
9166                 basic->protection = entry->protection;
9167                 basic->inheritance = entry->inheritance;
9168                 basic->max_protection = entry->max_protection;
9169                 basic->behavior = entry->behavior;
9170                 basic->user_wired_count = entry->user_wired_count;
9171                 basic->reserved = entry->is_sub_map;
9172                 *address = start;
9173                 *size = (entry->vme_end - start);
9174
9175                 if (object_name) *object_name = IP_NULL;
9176                 if (entry->is_sub_map) {
9177                         basic->shared = FALSE;
9178                 } else {
9179                         basic->shared = entry->is_shared;
9180                 }
9181
9182                 vm_map_unlock_read(map);
9183                 return(KERN_SUCCESS);
9184         }
9185
9186         case VM_REGION_BASIC_INFO_64:
9187         {
9188                 vm_region_basic_info_64_t       basic;
9189
9190                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9191                         return(KERN_INVALID_ARGUMENT);
9192
9193                 basic = (vm_region_basic_info_64_t) info;
9194                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9195
9196                 vm_map_lock_read(map);
9197
9198                 start = *address;
9199                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9200                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9201                                 vm_map_unlock_read(map);
9202                                 return(KERN_INVALID_ADDRESS);
9203                         }
9204                 } else {
9205                         entry = tmp_entry;
9206                 }
9207
9208                 start = entry->vme_start;
9209
9210                 basic->offset = entry->offset;
9211                 basic->protection = entry->protection;
9212                 basic->inheritance = entry->inheritance;
9213                 basic->max_protection = entry->max_protection;
9214                 basic->behavior = entry->behavior;
9215                 basic->user_wired_count = entry->user_wired_count;
9216                 basic->reserved = entry->is_sub_map;
9217                 *address = start;
9218                 *size = (entry->vme_end - start);
9219
9220                 if (object_name) *object_name = IP_NULL;
9221                 if (entry->is_sub_map) {
9222                         basic->shared = FALSE;
9223                 } else {
9224                         basic->shared = entry->is_shared;
9225                 }
9226
9227                 vm_map_unlock_read(map);
9228                 return(KERN_SUCCESS);
9229         }
9230         case VM_REGION_EXTENDED_INFO:
9231         {
9232                 vm_region_extended_info_t       extended;
9233
9234                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9235                         return(KERN_INVALID_ARGUMENT);
9236
9237                 extended = (vm_region_extended_info_t) info;
9238                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9239
9240                 vm_map_lock_read(map);
9241
9242                 start = *address;
9243                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9244                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9245                                 vm_map_unlock_read(map);
9246                                 return(KERN_INVALID_ADDRESS);
9247                         }
9248                 } else {
9249                         entry = tmp_entry;
9250                 }
9251                 start = entry->vme_start;
9252
9253                 extended->protection = entry->protection;
9254                 extended->user_tag = entry->alias;
9255                 extended->pages_resident = 0;
9256                 extended->pages_swapped_out = 0;
9257                 extended->pages_shared_now_private = 0;
9258                 extended->pages_dirtied = 0;
9259                 extended->external_pager = 0;
9260                 extended->shadow_depth = 0;
9261
9262                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9263
9264                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9265                         extended->share_mode = SM_PRIVATE;
9266
9267                 if (object_name)
9268                         *object_name = IP_NULL;
9269                 *address = start;
9270                 *size = (entry->vme_end - start);
9271
9272                 vm_map_unlock_read(map);
9273                 return(KERN_SUCCESS);
9274         }
9275         case VM_REGION_TOP_INFO:
9276         {
9277                 vm_region_top_info_t    top;
9278
9279                 if (*count < VM_REGION_TOP_INFO_COUNT)
9280                         return(KERN_INVALID_ARGUMENT);
9281
9282                 top = (vm_region_top_info_t) info;
9283                 *count = VM_REGION_TOP_INFO_COUNT;
9284
9285                 vm_map_lock_read(map);
9286
9287                 start = *address;
9288                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9289                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9290                                 vm_map_unlock_read(map);
9291                                 return(KERN_INVALID_ADDRESS);
9292                         }
9293                 } else {
9294                         entry = tmp_entry;
9295
9296                 }
9297                 start = entry->vme_start;
9298
9299                 top->private_pages_resident = 0;
9300                 top->shared_pages_resident = 0;
9301
9302                 vm_map_region_top_walk(entry, top);
9303
9304                 if (object_name)
9305                         *object_name = IP_NULL;
9306                 *address = start;
9307                 *size = (entry->vme_end - start);
9308
9309                 vm_map_unlock_read(map);
9310                 return(KERN_SUCCESS);
9311         }
9312         default:
9313                 return(KERN_INVALID_ARGUMENT);
9314         }
9315 }
9316
9317 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9318         MIN((entry_size),                                               \
9319             ((obj)->all_reusable ?                                      \
9320              (obj)->wired_page_count :                                  \
9321              (obj)->resident_page_count - (obj)->reusable_page_count))
9322
9323 void
9324 vm_map_region_top_walk(
9325         vm_map_entry_t             entry,
9326         vm_region_top_info_t       top)
9327 {
9328
9329         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9330                 top->share_mode = SM_EMPTY;
9331                 top->ref_count = 0;
9332                 top->obj_id = 0;
9333                 return;
9334         }
9335
9336         {
9337                 struct  vm_object *obj, *tmp_obj;
9338                 int             ref_count;
9339                 uint32_t        entry_size;
9340
9341                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9342
9343                 obj = entry->object.vm_object;
9344
9345                 vm_object_lock(obj);
9346
9347                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9348                         ref_count--;
9349
9350                 assert(obj->reusable_page_count <= obj->resident_page_count);
9351                 if (obj->shadow) {
9352                         if (ref_count == 1)
9353                                 top->private_pages_resident =
9354                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9355                         else
9356                                 top->shared_pages_resident =
9357                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9358                         top->ref_count  = ref_count;
9359                         top->share_mode = SM_COW;
9360
9361                         while ((tmp_obj = obj->shadow)) {
9362                                 vm_object_lock(tmp_obj);
9363                                 vm_object_unlock(obj);
9364                                 obj = tmp_obj;
9365
9366                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9367                                         ref_count--;
9368
9369                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9370                                 top->shared_pages_resident +=
9371                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9372                                 top->ref_count += ref_count - 1;
9373                         }
9374                 } else {
9375                         if (entry->needs_copy) {
9376                                 top->share_mode = SM_COW;
9377                                 top->shared_pages_resident =
9378                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9379                         } else {
9380                                 if (ref_count == 1 ||
9381                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9382                                         top->share_mode = SM_PRIVATE;
9383                                         top->private_pages_resident =
9384                                                 OBJ_RESIDENT_COUNT(obj,
9385                                                                    entry_size);
9386                                 } else {
9387                                         top->share_mode = SM_SHARED;
9388                                         top->shared_pages_resident =
9389                                                 OBJ_RESIDENT_COUNT(obj,
9390                                                                   entry_size);
9391                                 }
9392                         }
9393                         top->ref_count = ref_count;
9394                 }
9395                 /* XXX K64: obj_id will be truncated */
9396                 top->obj_id = (unsigned int) (uintptr_t)obj;
9397
9398                 vm_object_unlock(obj);
9399         }
9400 }
9401
9402 void
9403 vm_map_region_walk(
9404         vm_map_t                        map,
9405         vm_map_offset_t                 va,
9406         vm_map_entry_t                  entry,
9407         vm_object_offset_t              offset,
9408         vm_object_size_t                range,
9409         vm_region_extended_info_t       extended,
9410         boolean_t                       look_for_pages)
9411 {
9412         register struct vm_object *obj, *tmp_obj;
9413         register vm_map_offset_t       last_offset;
9414         register int               i;
9415         register int               ref_count;
9416         struct vm_object        *shadow_object;
9417         int                     shadow_depth;
9418
9419         if ((entry->object.vm_object == 0) ||
9420             (entry->is_sub_map) ||
9421             (entry->object.vm_object->phys_contiguous)) {
9422                 extended->share_mode = SM_EMPTY;
9423                 extended->ref_count = 0;
9424                 return;
9425         }
9426         {
9427                 obj = entry->object.vm_object;
9428
9429                 vm_object_lock(obj);
9430
9431                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9432                         ref_count--;
9433
9434                 if (look_for_pages) {
9435                         for (last_offset = offset + range;
9436                              offset < last_offset;
9437                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9438                                 vm_map_region_look_for_page(map, va, obj,
9439                                                             offset, ref_count,
9440                                                             0, extended);
9441                 } else {
9442                         shadow_object = obj->shadow;
9443                         shadow_depth = 0;
9444
9445                         if ( !(obj->pager_trusted) && !(obj->internal))
9446                                 extended->external_pager = 1;
9447
9448                         if (shadow_object != VM_OBJECT_NULL) {
9449                                 vm_object_lock(shadow_object);
9450                                 for (;
9451                                      shadow_object != VM_OBJECT_NULL;
9452                                      shadow_depth++) {
9453                                         vm_object_t     next_shadow;
9454
9455                                         if ( !(shadow_object->pager_trusted) &&
9456                                              !(shadow_object->internal))
9457                                                 extended->external_pager = 1;
9458
9459                                         next_shadow = shadow_object->shadow;
9460                                         if (next_shadow) {
9461                                                 vm_object_lock(next_shadow);
9462                                         }
9463                                         vm_object_unlock(shadow_object);
9464                                         shadow_object = next_shadow;
9465                                 }
9466                         }
9467                         extended->shadow_depth = shadow_depth;
9468                 }
9469
9470                 if (extended->shadow_depth || entry->needs_copy)
9471                         extended->share_mode = SM_COW;
9472                 else {
9473                         if (ref_count == 1)
9474                                 extended->share_mode = SM_PRIVATE;
9475                         else {
9476                                 if (obj->true_share)
9477                                         extended->share_mode = SM_TRUESHARED;
9478                                 else
9479                                         extended->share_mode = SM_SHARED;
9480                         }
9481                 }
9482                 extended->ref_count = ref_count - extended->shadow_depth;
9483
9484                 for (i = 0; i < extended->shadow_depth; i++) {
9485                         if ((tmp_obj = obj->shadow) == 0)
9486                                 break;
9487                         vm_object_lock(tmp_obj);
9488                         vm_object_unlock(obj);
9489
9490                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9491                                 ref_count--;
9492
9493                         extended->ref_count += ref_count;
9494                         obj = tmp_obj;
9495                 }
9496                 vm_object_unlock(obj);
9497
9498                 if (extended->share_mode == SM_SHARED) {
9499                         register vm_map_entry_t      cur;
9500                         register vm_map_entry_t      last;
9501                         int      my_refs;
9502
9503                         obj = entry->object.vm_object;
9504                         last = vm_map_to_entry(map);
9505                         my_refs = 0;
9506
9507                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9508                                 ref_count--;
9509                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
9510                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
9511
9512                         if (my_refs == ref_count)
9513                                 extended->share_mode = SM_PRIVATE_ALIASED;
9514                         else if (my_refs > 1)
9515                                 extended->share_mode = SM_SHARED_ALIASED;
9516                 }
9517         }
9518 }
9519
9520
9521 /* object is locked on entry and locked on return */
9522
9523
9524 static void
9525 vm_map_region_look_for_page(
9526         __unused vm_map_t               map,
9527         __unused vm_map_offset_t        va,
9528         vm_object_t                     object,
9529         vm_object_offset_t              offset,
9530         int                             max_refcnt,
9531         int                             depth,
9532         vm_region_extended_info_t       extended)
9533 {
9534         register vm_page_t      p;
9535         register vm_object_t    shadow;
9536         register int            ref_count;
9537         vm_object_t             caller_object;
9538 #if     MACH_PAGEMAP
9539         kern_return_t           kr;
9540 #endif
9541         shadow = object->shadow;
9542         caller_object = object;
9543
9544
9545         while (TRUE) {
9546
9547                 if ( !(object->pager_trusted) && !(object->internal))
9548                         extended->external_pager = 1;
9549
9550                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
9551                         if (shadow && (max_refcnt == 1))
9552                                 extended->pages_shared_now_private++;
9553
9554                         if (!p->fictitious &&
9555                             (p->dirty || pmap_is_modified(p->phys_page)))
9556                                 extended->pages_dirtied++;
9557
9558                         extended->pages_resident++;
9559
9560                         if(object != caller_object)
9561                                 vm_object_unlock(object);
9562
9563                         return;
9564                 }
9565 #if     MACH_PAGEMAP
9566                 if (object->existence_map) {
9567                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
9568
9569                                 extended->pages_swapped_out++;
9570
9571                                 if(object != caller_object)
9572                                         vm_object_unlock(object);
9573
9574                                 return;
9575                         }
9576                 } else if (object->internal &&
9577                            object->alive &&
9578                            !object->terminating &&
9579                            object->pager_ready) {
9580
9581                         memory_object_t pager;
9582
9583                         vm_object_paging_begin(object);
9584                         pager = object->pager;
9585                         vm_object_unlock(object);
9586
9587                         kr = memory_object_data_request(
9588                                 pager,
9589                                 offset + object->paging_offset,
9590                                 0, /* just poke the pager */
9591                                 VM_PROT_READ,
9592                                 NULL);
9593
9594                         vm_object_lock(object);
9595                         vm_object_paging_end(object);
9596
9597                         if (kr == KERN_SUCCESS) {
9598                                 /* the pager has that page */
9599                                 extended->pages_swapped_out++;
9600                                 if (object != caller_object)
9601                                         vm_object_unlock(object);
9602                                 return;
9603                         }
9604                 }
9605 #endif /* MACH_PAGEMAP */
9606
9607                 if (shadow) {
9608                         vm_object_lock(shadow);
9609
9610                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
9611                                 ref_count--;
9612
9613                         if (++depth > extended->shadow_depth)
9614                                 extended->shadow_depth = depth;
9615
9616                         if (ref_count > max_refcnt)
9617                                 max_refcnt = ref_count;
9618
9619                         if(object != caller_object)
9620                                 vm_object_unlock(object);
9621
9622                         offset = offset + object->shadow_offset;
9623                         object = shadow;
9624                         shadow = object->shadow;
9625                         continue;
9626                 }
9627                 if(object != caller_object)
9628                         vm_object_unlock(object);
9629                 break;
9630         }
9631 }
9632
9633 static int
9634 vm_map_region_count_obj_refs(
9635         vm_map_entry_t    entry,
9636         vm_object_t       object)
9637 {
9638         register int ref_count;
9639         register vm_object_t chk_obj;
9640         register vm_object_t tmp_obj;
9641
9642         if (entry->object.vm_object == 0)
9643                 return(0);
9644
9645         if (entry->is_sub_map)
9646                 return(0);
9647         else {
9648                 ref_count = 0;
9649
9650                 chk_obj = entry->object.vm_object;
9651                 vm_object_lock(chk_obj);
9652
9653                 while (chk_obj) {
9654                         if (chk_obj == object)
9655                                 ref_count++;
9656                         tmp_obj = chk_obj->shadow;
9657                         if (tmp_obj)
9658                                 vm_object_lock(tmp_obj);
9659                         vm_object_unlock(chk_obj);
9660
9661                         chk_obj = tmp_obj;
9662                 }
9663         }
9664         return(ref_count);
9665 }
9666
9667
9668 /*
9669  *      Routine:        vm_map_simplify
9670  *
9671  *      Description:
9672  *              Attempt to simplify the map representation in
9673  *              the vicinity of the given starting address.
9674  *      Note:
9675  *              This routine is intended primarily to keep the
9676  *              kernel maps more compact -- they generally don't
9677  *              benefit from the "expand a map entry" technology
9678  *              at allocation time because the adjacent entry
9679  *              is often wired down.
9680  */
9681 void
9682 vm_map_simplify_entry(
9683         vm_map_t        map,
9684         vm_map_entry_t  this_entry)
9685 {
9686         vm_map_entry_t  prev_entry;
9687
9688         counter(c_vm_map_simplify_entry_called++);
9689
9690         prev_entry = this_entry->vme_prev;
9691
9692         if ((this_entry != vm_map_to_entry(map)) &&
9693             (prev_entry != vm_map_to_entry(map)) &&
9694
9695             (prev_entry->vme_end == this_entry->vme_start) &&
9696
9697             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
9698
9699             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
9700             ((prev_entry->offset + (prev_entry->vme_end -
9701                                     prev_entry->vme_start))
9702              == this_entry->offset) &&
9703
9704             (prev_entry->inheritance == this_entry->inheritance) &&
9705             (prev_entry->protection == this_entry->protection) &&
9706             (prev_entry->max_protection == this_entry->max_protection) &&
9707             (prev_entry->behavior == this_entry->behavior) &&
9708             (prev_entry->alias == this_entry->alias) &&
9709             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
9710             (prev_entry->no_cache == this_entry->no_cache) &&
9711             (prev_entry->wired_count == this_entry->wired_count) &&
9712             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
9713
9714             (prev_entry->needs_copy == this_entry->needs_copy) &&
9715             (prev_entry->permanent == this_entry->permanent) &&
9716
9717             (prev_entry->use_pmap == FALSE) &&
9718             (this_entry->use_pmap == FALSE) &&
9719             (prev_entry->in_transition == FALSE) &&
9720             (this_entry->in_transition == FALSE) &&
9721             (prev_entry->needs_wakeup == FALSE) &&
9722             (this_entry->needs_wakeup == FALSE) &&
9723             (prev_entry->is_shared == FALSE) &&
9724             (this_entry->is_shared == FALSE)
9725                 ) {
9726                 _vm_map_entry_unlink(&map->hdr, prev_entry);
9727                 this_entry->vme_start = prev_entry->vme_start;
9728                 this_entry->offset = prev_entry->offset;
9729                 if (prev_entry->is_sub_map) {
9730                         vm_map_deallocate(prev_entry->object.sub_map);
9731                 } else {
9732                         vm_object_deallocate(prev_entry->object.vm_object);
9733                 }
9734                 vm_map_entry_dispose(map, prev_entry);
9735                 SAVE_HINT_MAP_WRITE(map, this_entry);
9736                 counter(c_vm_map_simplified++);
9737         }
9738 }
9739
9740 void
9741 vm_map_simplify(
9742         vm_map_t        map,
9743         vm_map_offset_t start)
9744 {
9745         vm_map_entry_t  this_entry;
9746
9747         vm_map_lock(map);
9748         if (vm_map_lookup_entry(map, start, &this_entry)) {
9749                 vm_map_simplify_entry(map, this_entry);
9750                 vm_map_simplify_entry(map, this_entry->vme_next);
9751         }
9752         counter(c_vm_map_simplify_called++);
9753         vm_map_unlock(map);
9754 }
9755
9756 static void
9757 vm_map_simplify_range(
9758         vm_map_t        map,
9759         vm_map_offset_t start,
9760         vm_map_offset_t end)
9761 {
9762         vm_map_entry_t  entry;
9763
9764         /*
9765          * The map should be locked (for "write") by the caller.
9766          */
9767
9768         if (start >= end) {
9769                 /* invalid address range */
9770                 return;
9771         }
9772
9773         start = vm_map_trunc_page(start);
9774         end = vm_map_round_page(end);
9775
9776         if (!vm_map_lookup_entry(map, start, &entry)) {
9777                 /* "start" is not mapped and "entry" ends before "start" */
9778                 if (entry == vm_map_to_entry(map)) {
9779                         /* start with first entry in the map */
9780                         entry = vm_map_first_entry(map);
9781                 } else {
9782                         /* start with next entry */
9783                         entry = entry->vme_next;
9784                 }
9785         }
9786
9787         while (entry != vm_map_to_entry(map) &&
9788                entry->vme_start <= end) {
9789                 /* try and coalesce "entry" with its previous entry */
9790                 vm_map_simplify_entry(map, entry);
9791                 entry = entry->vme_next;
9792         }
9793 }
9794
9795
9796 /*
9797  *      Routine:        vm_map_machine_attribute
9798  *      Purpose:
9799  *              Provide machine-specific attributes to mappings,
9800  *              such as cachability etc. for machines that provide
9801  *              them.  NUMA architectures and machines with big/strange
9802  *              caches will use this.
9803  *      Note:
9804  *              Responsibilities for locking and checking are handled here,
9805  *              everything else in the pmap module. If any non-volatile
9806  *              information must be kept, the pmap module should handle
9807  *              it itself. [This assumes that attributes do not
9808  *              need to be inherited, which seems ok to me]
9809  */
9810 kern_return_t
9811 vm_map_machine_attribute(
9812         vm_map_t                        map,
9813         vm_map_offset_t         start,
9814         vm_map_offset_t         end,
9815         vm_machine_attribute_t  attribute,
9816         vm_machine_attribute_val_t* value)              /* IN/OUT */
9817 {
9818         kern_return_t   ret;
9819         vm_map_size_t sync_size;
9820         vm_map_entry_t entry;
9821
9822         if (start < vm_map_min(map) || end > vm_map_max(map))
9823                 return KERN_INVALID_ADDRESS;
9824
9825         /* Figure how much memory we need to flush (in page increments) */
9826         sync_size = end - start;
9827
9828         vm_map_lock(map);
9829
9830         if (attribute != MATTR_CACHE) {
9831                 /* If we don't have to find physical addresses, we */
9832                 /* don't have to do an explicit traversal here.    */
9833                 ret = pmap_attribute(map->pmap, start, end-start,
9834                                      attribute, value);
9835                 vm_map_unlock(map);
9836                 return ret;
9837         }
9838
9839         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
9840
9841         while(sync_size) {
9842                 if (vm_map_lookup_entry(map, start, &entry)) {
9843                         vm_map_size_t   sub_size;
9844                         if((entry->vme_end - start) > sync_size) {
9845                                 sub_size = sync_size;
9846                                 sync_size = 0;
9847                         } else {
9848                                 sub_size = entry->vme_end - start;
9849                                 sync_size -= sub_size;
9850                         }
9851                         if(entry->is_sub_map) {
9852                                 vm_map_offset_t sub_start;
9853                                 vm_map_offset_t sub_end;
9854
9855                                 sub_start = (start - entry->vme_start)
9856                                         + entry->offset;
9857                                 sub_end = sub_start + sub_size;
9858                                 vm_map_machine_attribute(
9859                                         entry->object.sub_map,
9860                                         sub_start,
9861                                         sub_end,
9862                                         attribute, value);
9863                         } else {
9864                                 if(entry->object.vm_object) {
9865                                         vm_page_t               m;
9866                                         vm_object_t             object;
9867                                         vm_object_t             base_object;
9868                                         vm_object_t             last_object;
9869                                         vm_object_offset_t      offset;
9870                                         vm_object_offset_t      base_offset;
9871                                         vm_map_size_t           range;
9872                                         range = sub_size;
9873                                         offset = (start - entry->vme_start)
9874                                                 + entry->offset;
9875                                         base_offset = offset;
9876                                         object = entry->object.vm_object;
9877                                         base_object = object;
9878                                         last_object = NULL;
9879
9880                                         vm_object_lock(object);
9881
9882                                         while (range) {
9883                                                 m = vm_page_lookup(
9884                                                         object, offset);
9885
9886                                                 if (m && !m->fictitious) {
9887                                                         ret =
9888                                                                 pmap_attribute_cache_sync(
9889                                                                         m->phys_page,
9890                                                                         PAGE_SIZE,
9891                                                                         attribute, value);
9892
9893                                                 } else if (object->shadow) {
9894                                                         offset = offset + object->shadow_offset;
9895                                                         last_object = object;
9896                                                         object = object->shadow;
9897                                                         vm_object_lock(last_object->shadow);
9898                                                         vm_object_unlock(last_object);
9899                                                         continue;
9900                                                 }
9901                                                 range -= PAGE_SIZE;
9902
9903                                                 if (base_object != object) {
9904                                                         vm_object_unlock(object);
9905                                                         vm_object_lock(base_object);
9906                                                         object = base_object;
9907                                                 }
9908                                                 /* Bump to the next page */
9909                                                 base_offset += PAGE_SIZE;
9910                                                 offset = base_offset;
9911                                         }
9912                                         vm_object_unlock(object);
9913                                 }
9914                         }
9915                         start += sub_size;
9916                 } else {
9917                         vm_map_unlock(map);
9918                         return KERN_FAILURE;
9919                 }
9920
9921         }
9922
9923         vm_map_unlock(map);
9924
9925         return ret;
9926 }
9927
9928 /*
9929  *      vm_map_behavior_set:
9930  *
9931  *      Sets the paging reference behavior of the specified address
9932  *      range in the target map.  Paging reference behavior affects
9933  *      how pagein operations resulting from faults on the map will be
9934  *      clustered.
9935  */
9936 kern_return_t
9937 vm_map_behavior_set(
9938         vm_map_t        map,
9939         vm_map_offset_t start,
9940         vm_map_offset_t end,
9941         vm_behavior_t   new_behavior)
9942 {
9943         register vm_map_entry_t entry;
9944         vm_map_entry_t  temp_entry;
9945
9946         XPR(XPR_VM_MAP,
9947             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
9948             map, start, end, new_behavior, 0);
9949
9950         switch (new_behavior) {
9951
9952         /*
9953          * This first block of behaviors all set a persistent state on the specified
9954          * memory range.  All we have to do here is to record the desired behavior
9955          * in the vm_map_entry_t's.
9956          */
9957
9958         case VM_BEHAVIOR_DEFAULT:
9959         case VM_BEHAVIOR_RANDOM:
9960         case VM_BEHAVIOR_SEQUENTIAL:
9961         case VM_BEHAVIOR_RSEQNTL:
9962         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
9963                 vm_map_lock(map);
9964
9965                 /*
9966                  *      The entire address range must be valid for the map.
9967                  *      Note that vm_map_range_check() does a
9968                  *      vm_map_lookup_entry() internally and returns the
9969                  *      entry containing the start of the address range if
9970                  *      the entire range is valid.
9971                  */
9972                 if (vm_map_range_check(map, start, end, &temp_entry)) {
9973                         entry = temp_entry;
9974                         vm_map_clip_start(map, entry, start);
9975                 }
9976                 else {
9977                         vm_map_unlock(map);
9978                         return(KERN_INVALID_ADDRESS);
9979                 }
9980
9981                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
9982                         vm_map_clip_end(map, entry, end);
9983                         assert(!entry->use_pmap);
9984
9985                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
9986                                 entry->zero_wired_pages = TRUE;
9987                         } else {
9988                                 entry->behavior = new_behavior;
9989                         }
9990                         entry = entry->vme_next;
9991                 }
9992
9993                 vm_map_unlock(map);
9994                 break;
9995
9996         /*
9997          * The rest of these are different from the above in that they cause
9998          * an immediate action to take place as opposed to setting a behavior that
9999          * affects future actions.
10000          */
10001
10002         case VM_BEHAVIOR_WILLNEED:
10003                 return vm_map_willneed(map, start, end);
10004
10005         case VM_BEHAVIOR_DONTNEED:
10006                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10007
10008         case VM_BEHAVIOR_FREE:
10009                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10010
10011         case VM_BEHAVIOR_REUSABLE:
10012                 return vm_map_reusable_pages(map, start, end);
10013
10014         case VM_BEHAVIOR_REUSE:
10015                 return vm_map_reuse_pages(map, start, end);
10016
10017         case VM_BEHAVIOR_CAN_REUSE:
10018                 return vm_map_can_reuse(map, start, end);
10019
10020         default:
10021                 return(KERN_INVALID_ARGUMENT);
10022         }
10023
10024         return(KERN_SUCCESS);
10025 }
10026
10027
10028 /*
10029  * Internals for madvise(MADV_WILLNEED) system call.
10030  *
10031  * The present implementation is to do a read-ahead if the mapping corresponds
10032  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10033  * and basically ignore the "advice" (which we are always free to do).
10034  */
10035
10036
10037 static kern_return_t
10038 vm_map_willneed(
10039         vm_map_t        map,
10040         vm_map_offset_t start,
10041         vm_map_offset_t end
10042 )
10043 {
10044         vm_map_entry_t                  entry;
10045         vm_object_t                     object;
10046         memory_object_t                 pager;
10047         struct vm_object_fault_info     fault_info;
10048         kern_return_t                   kr;
10049         vm_object_size_t                len;
10050         vm_object_offset_t              offset;
10051
10052         /*
10053          * Fill in static values in fault_info.  Several fields get ignored by the code
10054          * we call, but we'll fill them in anyway since uninitialized fields are bad
10055          * when it comes to future backwards compatibility.
10056          */
10057
10058         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10059         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10060         fault_info.no_cache      = FALSE;                       /* ignored value */
10061         fault_info.stealth       = TRUE;
10062         fault_info.mark_zf_absent = FALSE;
10063
10064         /*
10065          * The MADV_WILLNEED operation doesn't require any changes to the
10066          * vm_map_entry_t's, so the read lock is sufficient.
10067          */
10068
10069         vm_map_lock_read(map);
10070
10071         /*
10072          * The madvise semantics require that the address range be fully
10073          * allocated with no holes.  Otherwise, we're required to return
10074          * an error.
10075          */
10076
10077         if (vm_map_range_check(map, start, end, &entry)) {
10078
10079                 /*
10080                  * Examine each vm_map_entry_t in the range.
10081                  */
10082
10083                 for (; entry->vme_start < end; start += len, entry = entry->vme_next) {
10084
10085                         /*
10086                          * The first time through, the start address could be anywhere within the
10087                          * vm_map_entry we found.  So adjust the offset to correspond.  After that,
10088                          * the offset will always be zero to correspond to the beginning of the current
10089                          * vm_map_entry.
10090                          */
10091
10092                         offset = (start - entry->vme_start) + entry->offset;
10093
10094                         /*
10095                          * Set the length so we don't go beyond the end of the map_entry or beyond the
10096                          * end of the range we were given.  This range could span also multiple map
10097                          * entries all of which map different files, so make sure we only do the right
10098                          * amount of I/O for each object.  Note that it's possible for there to be
10099                          * multiple map entries all referring to the same object but with different
10100                          * page permissions, but it's not worth trying to optimize that case.
10101                          */
10102
10103                         len = MIN(entry->vme_end - start, end - start);
10104
10105                         if ((vm_size_t) len != len) {
10106                                 /* 32-bit overflow */
10107                                 len = (vm_size_t) (0 - PAGE_SIZE);
10108                         }
10109                         fault_info.cluster_size = (vm_size_t) len;
10110                         fault_info.lo_offset    = offset;
10111                         fault_info.hi_offset    = offset + len;
10112                         fault_info.user_tag     = entry->alias;
10113
10114                         /*
10115                          * If there's no read permission to this mapping, then just skip it.
10116                          */
10117
10118                         if ((entry->protection & VM_PROT_READ) == 0) {
10119                                 continue;
10120                         }
10121
10122                         /*
10123                          * Find the file object backing this map entry.  If there is none,
10124                          * then we simply ignore the "will need" advice for this entry and
10125                          * go on to the next one.
10126                          */
10127
10128                         if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10129                                 continue;
10130                         }
10131
10132                         vm_object_paging_begin(object);
10133                         pager = object->pager;
10134                         vm_object_unlock(object);
10135
10136                         /*
10137                          * Get the data from the object asynchronously.
10138                          *
10139                          * Note that memory_object_data_request() places limits on the amount
10140                          * of I/O it will do.  Regardless of the len we specified, it won't do
10141                          * more than MAX_UPL_TRANSFER and it silently truncates the len to that
10142                          * size.  This isn't necessarily bad since madvise shouldn't really be
10143                          * used to page in unlimited amounts of data.  Other Unix variants limit
10144                          * the willneed case as well.  If this turns out to be an issue for
10145                          * developers, then we can always adjust the policy here and still be
10146                          * backwards compatible since this is all just "advice".
10147                          */
10148
10149                         kr = memory_object_data_request(
10150                                 pager,
10151                                 offset + object->paging_offset,
10152                                 0,      /* ignored */
10153                                 VM_PROT_READ,
10154                                 (memory_object_fault_info_t)&fault_info);
10155
10156                         vm_object_lock(object);
10157                         vm_object_paging_end(object);
10158                         vm_object_unlock(object);
10159
10160                         /*
10161                          * If we couldn't do the I/O for some reason, just give up on the
10162                          * madvise.  We still return success to the user since madvise isn't
10163                          * supposed to fail when the advice can't be taken.
10164                          */
10165
10166                         if (kr != KERN_SUCCESS) {
10167                                 break;
10168                         }
10169                 }
10170
10171                 kr = KERN_SUCCESS;
10172         } else
10173                 kr = KERN_INVALID_ADDRESS;
10174
10175         vm_map_unlock_read(map);
10176         return kr;
10177 }
10178
10179 static boolean_t
10180 vm_map_entry_is_reusable(
10181         vm_map_entry_t entry)
10182 {
10183         vm_object_t object;
10184
10185         if (entry->is_shared ||
10186             entry->is_sub_map ||
10187             entry->in_transition ||
10188             entry->protection != VM_PROT_DEFAULT ||
10189             entry->max_protection != VM_PROT_ALL ||
10190             entry->inheritance != VM_INHERIT_DEFAULT ||
10191             entry->no_cache ||
10192             entry->permanent ||
10193             entry->superpage_size != 0 ||
10194             entry->zero_wired_pages ||
10195             entry->wired_count != 0 ||
10196             entry->user_wired_count != 0) {
10197                 return FALSE;
10198         }
10199
10200         object = entry->object.vm_object;
10201         if (object == VM_OBJECT_NULL) {
10202                 return TRUE;
10203         }
10204         if (object->ref_count == 1 &&
10205             object->wired_page_count == 0 &&
10206             object->copy == VM_OBJECT_NULL &&
10207             object->shadow == VM_OBJECT_NULL &&
10208             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10209             object->internal &&
10210             !object->true_share &&
10211             object->wimg_bits == VM_WIMG_DEFAULT &&
10212             !object->code_signed) {
10213                 return TRUE;
10214         }
10215         return FALSE;
10216
10217
10218 }
10219
10220 static kern_return_t
10221 vm_map_reuse_pages(
10222         vm_map_t        map,
10223         vm_map_offset_t start,
10224         vm_map_offset_t end)
10225 {
10226         vm_map_entry_t                  entry;
10227         vm_object_t                     object;
10228         vm_object_offset_t              start_offset, end_offset;
10229
10230         /*
10231          * The MADV_REUSE operation doesn't require any changes to the
10232          * vm_map_entry_t's, so the read lock is sufficient.
10233          */
10234
10235         vm_map_lock_read(map);
10236
10237         /*
10238          * The madvise semantics require that the address range be fully
10239          * allocated with no holes.  Otherwise, we're required to return
10240          * an error.
10241          */
10242
10243         if (!vm_map_range_check(map, start, end, &entry)) {
10244                 vm_map_unlock_read(map);
10245                 vm_page_stats_reusable.reuse_pages_failure++;
10246                 return KERN_INVALID_ADDRESS;
10247         }
10248
10249         /*
10250          * Examine each vm_map_entry_t in the range.
10251          */
10252         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10253              entry = entry->vme_next) {
10254                 /*
10255                  * Sanity check on the VM map entry.
10256                  */
10257                 if (! vm_map_entry_is_reusable(entry)) {
10258                         vm_map_unlock_read(map);
10259                         vm_page_stats_reusable.reuse_pages_failure++;
10260                         return KERN_INVALID_ADDRESS;
10261                 }
10262
10263                 /*
10264                  * The first time through, the start address could be anywhere
10265                  * within the vm_map_entry we found.  So adjust the offset to
10266                  * correspond.
10267                  */
10268                 if (entry->vme_start < start) {
10269                         start_offset = start - entry->vme_start;
10270                 } else {
10271                         start_offset = 0;
10272                 }
10273                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10274                 start_offset += entry->offset;
10275                 end_offset += entry->offset;
10276
10277                 object = entry->object.vm_object;
10278                 if (object != VM_OBJECT_NULL) {
10279                         vm_object_lock(object);
10280                         vm_object_reuse_pages(object, start_offset, end_offset,
10281                                               TRUE);
10282                         vm_object_unlock(object);
10283                 }
10284
10285                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10286                         /*
10287                          * XXX
10288                          * We do not hold the VM map exclusively here.
10289                          * The "alias" field is not that critical, so it's
10290                          * safe to update it here, as long as it is the only
10291                          * one that can be modified while holding the VM map
10292                          * "shared".
10293                          */
10294                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10295                 }
10296         }
10297
10298         vm_map_unlock_read(map);
10299         vm_page_stats_reusable.reuse_pages_success++;
10300         return KERN_SUCCESS;
10301 }
10302
10303
10304 static kern_return_t
10305 vm_map_reusable_pages(
10306         vm_map_t        map,
10307         vm_map_offset_t start,
10308         vm_map_offset_t end)
10309 {
10310         vm_map_entry_t                  entry;
10311         vm_object_t                     object;
10312         vm_object_offset_t              start_offset, end_offset;
10313
10314         /*
10315          * The MADV_REUSABLE operation doesn't require any changes to the
10316          * vm_map_entry_t's, so the read lock is sufficient.
10317          */
10318
10319         vm_map_lock_read(map);
10320
10321         /*
10322          * The madvise semantics require that the address range be fully
10323          * allocated with no holes.  Otherwise, we're required to return
10324          * an error.
10325          */
10326
10327         if (!vm_map_range_check(map, start, end, &entry)) {
10328                 vm_map_unlock_read(map);
10329                 vm_page_stats_reusable.reusable_pages_failure++;
10330                 return KERN_INVALID_ADDRESS;
10331         }
10332
10333         /*
10334          * Examine each vm_map_entry_t in the range.
10335          */
10336         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10337              entry = entry->vme_next) {
10338                 int kill_pages = 0;
10339
10340                 /*
10341                  * Sanity check on the VM map entry.
10342                  */
10343                 if (! vm_map_entry_is_reusable(entry)) {
10344                         vm_map_unlock_read(map);
10345                         vm_page_stats_reusable.reusable_pages_failure++;
10346                         return KERN_INVALID_ADDRESS;
10347                 }
10348
10349                 /*
10350                  * The first time through, the start address could be anywhere
10351                  * within the vm_map_entry we found.  So adjust the offset to
10352                  * correspond.
10353                  */
10354                 if (entry->vme_start < start) {
10355                         start_offset = start - entry->vme_start;
10356                 } else {
10357                         start_offset = 0;
10358                 }
10359                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10360                 start_offset += entry->offset;
10361                 end_offset += entry->offset;
10362
10363                 object = entry->object.vm_object;
10364                 if (object == VM_OBJECT_NULL)
10365                         continue;
10366
10367
10368                 vm_object_lock(object);
10369                 if (object->ref_count == 1 && !object->shadow)
10370                         kill_pages = 1;
10371                 else
10372                         kill_pages = -1;
10373                 if (kill_pages != -1) {
10374                         vm_object_deactivate_pages(object,
10375                                                    start_offset,
10376                                                    end_offset - start_offset,
10377                                                    kill_pages,
10378                                                    TRUE /*reusable_pages*/);
10379                 } else {
10380                         vm_page_stats_reusable.reusable_pages_shared++;
10381                 }
10382                 vm_object_unlock(object);
10383
10384                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10385                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10386                         /*
10387                          * XXX
10388                          * We do not hold the VM map exclusively here.
10389                          * The "alias" field is not that critical, so it's
10390                          * safe to update it here, as long as it is the only
10391                          * one that can be modified while holding the VM map
10392                          * "shared".
10393                          */
10394                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10395                 }
10396         }
10397
10398         vm_map_unlock_read(map);
10399         vm_page_stats_reusable.reusable_pages_success++;
10400         return KERN_SUCCESS;
10401 }
10402
10403
10404 static kern_return_t
10405 vm_map_can_reuse(
10406         vm_map_t        map,
10407         vm_map_offset_t start,
10408         vm_map_offset_t end)
10409 {
10410         vm_map_entry_t                  entry;
10411
10412         /*
10413          * The MADV_REUSABLE operation doesn't require any changes to the
10414          * vm_map_entry_t's, so the read lock is sufficient.
10415          */
10416
10417         vm_map_lock_read(map);
10418
10419         /*
10420          * The madvise semantics require that the address range be fully
10421          * allocated with no holes.  Otherwise, we're required to return
10422          * an error.
10423          */
10424
10425         if (!vm_map_range_check(map, start, end, &entry)) {
10426                 vm_map_unlock_read(map);
10427                 vm_page_stats_reusable.can_reuse_failure++;
10428                 return KERN_INVALID_ADDRESS;
10429         }
10430
10431         /*
10432          * Examine each vm_map_entry_t in the range.
10433          */
10434         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10435              entry = entry->vme_next) {
10436                 /*
10437                  * Sanity check on the VM map entry.
10438                  */
10439                 if (! vm_map_entry_is_reusable(entry)) {
10440                         vm_map_unlock_read(map);
10441                         vm_page_stats_reusable.can_reuse_failure++;
10442                         return KERN_INVALID_ADDRESS;
10443                 }
10444         }
10445
10446         vm_map_unlock_read(map);
10447         vm_page_stats_reusable.can_reuse_success++;
10448         return KERN_SUCCESS;
10449 }
10450
10451
10452
10453 #include <mach_kdb.h>
10454 #if     MACH_KDB
10455 #include <ddb/db_output.h>
10456 #include <vm/vm_print.h>
10457
10458 #define printf  db_printf
10459
10460 /*
10461  * Forward declarations for internal functions.
10462  */
10463 extern void vm_map_links_print(
10464         struct vm_map_links     *links);
10465
10466 extern void vm_map_header_print(
10467         struct vm_map_header    *header);
10468
10469 extern void vm_map_entry_print(
10470         vm_map_entry_t          entry);
10471
10472 extern void vm_follow_entry(
10473         vm_map_entry_t          entry);
10474
10475 extern void vm_follow_map(
10476         vm_map_t                map);
10477
10478 /*
10479  *      vm_map_links_print:     [ debug ]
10480  */
10481 void
10482 vm_map_links_print(
10483         struct vm_map_links     *links)
10484 {
10485         iprintf("prev = %08X  next = %08X  start = %016llX  end = %016llX\n",
10486                 links->prev,
10487                 links->next,
10488                 (unsigned long long)links->start,
10489                 (unsigned long long)links->end);
10490 }
10491
10492 /*
10493  *      vm_map_header_print:    [ debug ]
10494  */
10495 void
10496 vm_map_header_print(
10497         struct vm_map_header    *header)
10498 {
10499         vm_map_links_print(&header->links);
10500         iprintf("nentries = %08X, %sentries_pageable\n",
10501                 header->nentries,
10502                 (header->entries_pageable ? "" : "!"));
10503 }
10504
10505 /*
10506  *      vm_follow_entry:        [ debug ]
10507  */
10508 void
10509 vm_follow_entry(
10510         vm_map_entry_t entry)
10511 {
10512         int shadows;
10513
10514         iprintf("map entry %08X\n", entry);
10515
10516         db_indent += 2;
10517
10518         shadows = vm_follow_object(entry->object.vm_object);
10519         iprintf("Total objects : %d\n",shadows);
10520
10521         db_indent -= 2;
10522 }
10523
10524 /*
10525  *      vm_map_entry_print:     [ debug ]
10526  */
10527 void
10528 vm_map_entry_print(
10529         register vm_map_entry_t entry)
10530 {
10531         static const char *inheritance_name[4] =
10532                 { "share", "copy", "none", "?"};
10533         static const char *behavior_name[4] =
10534                 { "dflt", "rand", "seqtl", "rseqntl" };
10535
10536         iprintf("map entry %08X - prev = %08X  next = %08X\n", entry, entry->vme_prev, entry->vme_next);
10537
10538         db_indent += 2;
10539
10540         vm_map_links_print(&entry->links);
10541
10542         iprintf("start = %016llX  end = %016llX - prot=%x/%x/%s\n",
10543                 (unsigned long long)entry->vme_start,
10544                 (unsigned long long)entry->vme_end,
10545                 entry->protection,
10546                 entry->max_protection,
10547                 inheritance_name[(entry->inheritance & 0x3)]);
10548
10549         iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n",
10550                 behavior_name[(entry->behavior & 0x3)],
10551                 entry->wired_count,
10552                 entry->user_wired_count);
10553         iprintf("%sin_transition, %sneeds_wakeup\n",
10554                 (entry->in_transition ? "" : "!"),
10555                 (entry->needs_wakeup ? "" : "!"));
10556
10557         if (entry->is_sub_map) {
10558                 iprintf("submap = %08X - offset = %016llX\n",
10559                         entry->object.sub_map,
10560                         (unsigned long long)entry->offset);
10561         } else {
10562                 iprintf("object = %08X  offset = %016llX - ",
10563                         entry->object.vm_object,
10564                         (unsigned long long)entry->offset);
10565                 printf("%sis_shared, %sneeds_copy\n",
10566                        (entry->is_shared ? "" : "!"),
10567                        (entry->needs_copy ? "" : "!"));
10568         }
10569
10570         db_indent -= 2;
10571 }
10572
10573 /*
10574  *      vm_follow_map:  [ debug ]
10575  */
10576 void
10577 vm_follow_map(
10578         vm_map_t map)
10579 {
10580         register vm_map_entry_t entry;
10581
10582         iprintf("task map %08X\n", map);
10583
10584         db_indent += 2;
10585
10586         for (entry = vm_map_first_entry(map);
10587              entry && entry != vm_map_to_entry(map);
10588              entry = entry->vme_next) {
10589                 vm_follow_entry(entry);
10590         }
10591
10592         db_indent -= 2;
10593 }
10594
10595 /*
10596  *      vm_map_print:   [ debug ]
10597  */
10598 void
10599 vm_map_print(
10600         db_addr_t inmap)
10601 {
10602         register vm_map_entry_t entry;
10603         vm_map_t map;
10604 #if TASK_SWAPPER
10605         char *swstate;
10606 #endif /* TASK_SWAPPER */
10607
10608         map = (vm_map_t)(long)
10609                 inmap;  /* Make sure we have the right type */
10610
10611         iprintf("task map %08X\n", map);
10612
10613         db_indent += 2;
10614
10615         vm_map_header_print(&map->hdr);
10616
10617         iprintf("pmap = %08X  size = %08X  ref = %d  hint = %08X  first_free = %08X\n",
10618                 map->pmap,
10619                 map->size,
10620                 map->ref_count,
10621                 map->hint,
10622                 map->first_free);
10623
10624         iprintf("%swait_for_space, %swiring_required, timestamp = %d\n",
10625                 (map->wait_for_space ? "" : "!"),
10626                 (map->wiring_required ? "" : "!"),
10627                 map->timestamp);
10628
10629 #if     TASK_SWAPPER
10630         switch (map->sw_state) {
10631         case MAP_SW_IN:
10632                 swstate = "SW_IN";
10633                 break;
10634         case MAP_SW_OUT:
10635                 swstate = "SW_OUT";
10636                 break;
10637         default:
10638                 swstate = "????";
10639                 break;
10640         }
10641         iprintf("res = %d, sw_state = %s\n", map->res_count, swstate);
10642 #endif  /* TASK_SWAPPER */
10643
10644         for (entry = vm_map_first_entry(map);
10645              entry && entry != vm_map_to_entry(map);
10646              entry = entry->vme_next) {
10647                 vm_map_entry_print(entry);
10648         }
10649
10650         db_indent -= 2;
10651 }
10652
10653 /*
10654  *      Routine:        vm_map_copy_print
10655  *      Purpose:
10656  *              Pretty-print a copy object for ddb.
10657  */
10658
10659 void
10660 vm_map_copy_print(
10661         db_addr_t       incopy)
10662 {
10663         vm_map_copy_t copy;
10664         vm_map_entry_t entry;
10665
10666         copy = (vm_map_copy_t)(long)
10667                 incopy; /* Make sure we have the right type */
10668
10669         printf("copy object 0x%x\n", copy);
10670
10671         db_indent += 2;
10672
10673         iprintf("type=%d", copy->type);
10674         switch (copy->type) {
10675         case VM_MAP_COPY_ENTRY_LIST:
10676                 printf("[entry_list]");
10677                 break;
10678
10679         case VM_MAP_COPY_OBJECT:
10680                 printf("[object]");
10681                 break;
10682
10683         case VM_MAP_COPY_KERNEL_BUFFER:
10684                 printf("[kernel_buffer]");
10685                 break;
10686
10687         default:
10688                 printf("[bad type]");
10689                 break;
10690         }
10691         printf(", offset=0x%llx", (unsigned long long)copy->offset);
10692         printf(", size=0x%x\n", copy->size);
10693
10694         switch (copy->type) {
10695         case VM_MAP_COPY_ENTRY_LIST:
10696                 vm_map_header_print(&copy->cpy_hdr);
10697                 for (entry = vm_map_copy_first_entry(copy);
10698                      entry && entry != vm_map_copy_to_entry(copy);
10699                      entry = entry->vme_next) {
10700                         vm_map_entry_print(entry);
10701                 }
10702                 break;
10703
10704         case VM_MAP_COPY_OBJECT:
10705                 iprintf("object=0x%x\n", copy->cpy_object);
10706                 break;
10707
10708         case VM_MAP_COPY_KERNEL_BUFFER:
10709                 iprintf("kernel buffer=0x%x", copy->cpy_kdata);
10710                 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size);
10711                 break;
10712
10713         }
10714
10715         db_indent -=2;
10716 }
10717
10718 /*
10719  *      db_vm_map_total_size(map)       [ debug ]
10720  *
10721  *      return the total virtual size (in bytes) of the map
10722  */
10723 vm_map_size_t
10724 db_vm_map_total_size(
10725         db_addr_t       inmap)
10726 {
10727         vm_map_entry_t  entry;
10728         vm_map_size_t   total;
10729         vm_map_t map;
10730
10731         map = (vm_map_t)(long)
10732                 inmap;  /* Make sure we have the right type */
10733
10734         total = 0;
10735         for (entry = vm_map_first_entry(map);
10736              entry != vm_map_to_entry(map);
10737              entry = entry->vme_next) {
10738                 total += entry->vme_end - entry->vme_start;
10739         }
10740
10741         return total;
10742 }
10743
10744 #endif  /* MACH_KDB */
10745
10746 /*
10747  *      Routine:        vm_map_entry_insert
10748  *
10749  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
10750  */
10751 vm_map_entry_t
10752 vm_map_entry_insert(
10753         vm_map_t                map,
10754         vm_map_entry_t          insp_entry,
10755         vm_map_offset_t         start,
10756         vm_map_offset_t         end,
10757         vm_object_t             object,
10758         vm_object_offset_t      offset,
10759         boolean_t               needs_copy,
10760         boolean_t               is_shared,
10761         boolean_t               in_transition,
10762         vm_prot_t               cur_protection,
10763         vm_prot_t               max_protection,
10764         vm_behavior_t           behavior,
10765         vm_inherit_t            inheritance,
10766         unsigned                wired_count,
10767         boolean_t               no_cache,
10768         boolean_t               permanent,
10769         unsigned int            superpage_size)
10770 {
10771         vm_map_entry_t  new_entry;
10772
10773         assert(insp_entry != (vm_map_entry_t)0);
10774
10775         new_entry = vm_map_entry_create(map);
10776
10777         new_entry->vme_start = start;
10778         new_entry->vme_end = end;
10779         assert(page_aligned(new_entry->vme_start));
10780         assert(page_aligned(new_entry->vme_end));
10781
10782         new_entry->object.vm_object = object;
10783         new_entry->offset = offset;
10784         new_entry->is_shared = is_shared;
10785         new_entry->is_sub_map = FALSE;
10786         new_entry->needs_copy = needs_copy;
10787         new_entry->in_transition = in_transition;
10788         new_entry->needs_wakeup = FALSE;
10789         new_entry->inheritance = inheritance;
10790         new_entry->protection = cur_protection;
10791         new_entry->max_protection = max_protection;
10792         new_entry->behavior = behavior;
10793         new_entry->wired_count = wired_count;
10794         new_entry->user_wired_count = 0;
10795         new_entry->use_pmap = FALSE;
10796         new_entry->alias = 0;
10797         new_entry->zero_wired_pages = FALSE;
10798         new_entry->no_cache = no_cache;
10799         new_entry->permanent = permanent;
10800         new_entry->superpage_size = superpage_size;
10801
10802         /*
10803          *      Insert the new entry into the list.
10804          */
10805
10806         vm_map_entry_link(map, insp_entry, new_entry);
10807         map->size += end - start;
10808
10809         /*
10810          *      Update the free space hint and the lookup hint.
10811          */
10812
10813         SAVE_HINT_MAP_WRITE(map, new_entry);
10814         return new_entry;
10815 }
10816
10817 /*
10818  *      Routine:        vm_map_remap_extract
10819  *
10820  *      Descritpion:    This routine returns a vm_entry list from a map.
10821  */
10822 static kern_return_t
10823 vm_map_remap_extract(
10824         vm_map_t                map,
10825         vm_map_offset_t         addr,
10826         vm_map_size_t           size,
10827         boolean_t               copy,
10828         struct vm_map_header    *map_header,
10829         vm_prot_t               *cur_protection,
10830         vm_prot_t               *max_protection,
10831         /* What, no behavior? */
10832         vm_inherit_t            inheritance,
10833         boolean_t               pageable)
10834 {
10835         kern_return_t           result;
10836         vm_map_size_t           mapped_size;
10837         vm_map_size_t           tmp_size;
10838         vm_map_entry_t          src_entry;     /* result of last map lookup */
10839         vm_map_entry_t          new_entry;
10840         vm_object_offset_t      offset;
10841         vm_map_offset_t         map_address;
10842         vm_map_offset_t         src_start;     /* start of entry to map */
10843         vm_map_offset_t         src_end;       /* end of region to be mapped */
10844         vm_object_t             object;
10845         vm_map_version_t        version;
10846         boolean_t               src_needs_copy;
10847         boolean_t               new_entry_needs_copy;
10848
10849         assert(map != VM_MAP_NULL);
10850         assert(size != 0 && size == vm_map_round_page(size));
10851         assert(inheritance == VM_INHERIT_NONE ||
10852                inheritance == VM_INHERIT_COPY ||
10853                inheritance == VM_INHERIT_SHARE);
10854
10855         /*
10856          *      Compute start and end of region.
10857          */
10858         src_start = vm_map_trunc_page(addr);
10859         src_end = vm_map_round_page(src_start + size);
10860
10861         /*
10862          *      Initialize map_header.
10863          */
10864         map_header->links.next = (struct vm_map_entry *)&map_header->links;
10865         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
10866         map_header->nentries = 0;
10867         map_header->entries_pageable = pageable;
10868
10869         *cur_protection = VM_PROT_ALL;
10870         *max_protection = VM_PROT_ALL;
10871
10872         map_address = 0;
10873         mapped_size = 0;
10874         result = KERN_SUCCESS;
10875
10876         /*
10877          *      The specified source virtual space might correspond to
10878          *      multiple map entries, need to loop on them.
10879          */
10880         vm_map_lock(map);
10881         while (mapped_size != size) {
10882                 vm_map_size_t   entry_size;
10883
10884                 /*
10885                  *      Find the beginning of the region.
10886                  */
10887                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
10888                         result = KERN_INVALID_ADDRESS;
10889                         break;
10890                 }
10891
10892                 if (src_start < src_entry->vme_start ||
10893                     (mapped_size && src_start != src_entry->vme_start)) {
10894                         result = KERN_INVALID_ADDRESS;
10895                         break;
10896                 }
10897
10898                 tmp_size = size - mapped_size;
10899                 if (src_end > src_entry->vme_end)
10900                         tmp_size -= (src_end - src_entry->vme_end);
10901
10902                 entry_size = (vm_map_size_t)(src_entry->vme_end -
10903                                              src_entry->vme_start);
10904
10905                 if(src_entry->is_sub_map) {
10906                         vm_map_reference(src_entry->object.sub_map);
10907                         object = VM_OBJECT_NULL;
10908                 } else {
10909                         object = src_entry->object.vm_object;
10910
10911                         if (object == VM_OBJECT_NULL) {
10912                                 object = vm_object_allocate(entry_size);
10913                                 src_entry->offset = 0;
10914                                 src_entry->object.vm_object = object;
10915                         } else if (object->copy_strategy !=
10916                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
10917                                 /*
10918                                  *      We are already using an asymmetric
10919                                  *      copy, and therefore we already have
10920                                  *      the right object.
10921                                  */
10922                                 assert(!src_entry->needs_copy);
10923                         } else if (src_entry->needs_copy || object->shadowed ||
10924                                    (object->internal && !object->true_share &&
10925                                     !src_entry->is_shared &&
10926                                     object->size > entry_size)) {
10927
10928                                 vm_object_shadow(&src_entry->object.vm_object,
10929                                                  &src_entry->offset,
10930                                                  entry_size);
10931
10932                                 if (!src_entry->needs_copy &&
10933                                     (src_entry->protection & VM_PROT_WRITE)) {
10934                                         vm_prot_t prot;
10935
10936                                         prot = src_entry->protection & ~VM_PROT_WRITE;
10937
10938                                         if (override_nx(map, src_entry->alias) && prot)
10939                                                 prot |= VM_PROT_EXECUTE;
10940
10941                                         if(map->mapped) {
10942                                                 vm_object_pmap_protect(
10943                                                         src_entry->object.vm_object,
10944                                                         src_entry->offset,
10945                                                         entry_size,
10946                                                         PMAP_NULL,
10947                                                         src_entry->vme_start,
10948                                                         prot);
10949                                         } else {
10950                                                 pmap_protect(vm_map_pmap(map),
10951                                                              src_entry->vme_start,
10952                                                              src_entry->vme_end,
10953                                                              prot);
10954                                         }
10955                                 }
10956
10957                                 object = src_entry->object.vm_object;
10958                                 src_entry->needs_copy = FALSE;
10959                         }
10960
10961
10962                         vm_object_lock(object);
10963                         vm_object_reference_locked(object); /* object ref. for new entry */
10964                         if (object->copy_strategy ==
10965                             MEMORY_OBJECT_COPY_SYMMETRIC) {
10966                                 object->copy_strategy =
10967                                         MEMORY_OBJECT_COPY_DELAY;
10968                         }
10969                         vm_object_unlock(object);
10970                 }
10971
10972                 offset = src_entry->offset + (src_start - src_entry->vme_start);
10973
10974                 new_entry = _vm_map_entry_create(map_header);
10975                 vm_map_entry_copy(new_entry, src_entry);
10976                 new_entry->use_pmap = FALSE; /* clr address space specifics */
10977
10978                 new_entry->vme_start = map_address;
10979                 new_entry->vme_end = map_address + tmp_size;
10980                 new_entry->inheritance = inheritance;
10981                 new_entry->offset = offset;
10982
10983                 /*
10984                  * The new region has to be copied now if required.
10985                  */
10986         RestartCopy:
10987                 if (!copy) {
10988                         src_entry->is_shared = TRUE;
10989                         new_entry->is_shared = TRUE;
10990                         if (!(new_entry->is_sub_map))
10991                                 new_entry->needs_copy = FALSE;
10992
10993                 } else if (src_entry->is_sub_map) {
10994                         /* make this a COW sub_map if not already */
10995                         new_entry->needs_copy = TRUE;
10996                         object = VM_OBJECT_NULL;
10997                 } else if (src_entry->wired_count == 0 &&
10998                            vm_object_copy_quickly(&new_entry->object.vm_object,
10999                                                   new_entry->offset,
11000                                                   (new_entry->vme_end -
11001                                                    new_entry->vme_start),
11002                                                   &src_needs_copy,
11003                                                   &new_entry_needs_copy)) {
11004
11005                         new_entry->needs_copy = new_entry_needs_copy;
11006                         new_entry->is_shared = FALSE;
11007
11008                         /*
11009                          * Handle copy_on_write semantics.
11010                          */
11011                         if (src_needs_copy && !src_entry->needs_copy) {
11012                                 vm_prot_t prot;
11013
11014                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11015
11016                                 if (override_nx(map, src_entry->alias) && prot)
11017                                         prot |= VM_PROT_EXECUTE;
11018
11019                                 vm_object_pmap_protect(object,
11020                                                        offset,
11021                                                        entry_size,
11022                                                        ((src_entry->is_shared
11023                                                          || map->mapped) ?
11024                                                         PMAP_NULL : map->pmap),
11025                                                        src_entry->vme_start,
11026                                                        prot);
11027
11028                                 src_entry->needs_copy = TRUE;
11029                         }
11030                         /*
11031                          * Throw away the old object reference of the new entry.
11032                          */
11033                         vm_object_deallocate(object);
11034
11035                 } else {
11036                         new_entry->is_shared = FALSE;
11037
11038                         /*
11039                          * The map can be safely unlocked since we
11040                          * already hold a reference on the object.
11041                          *
11042                          * Record the timestamp of the map for later
11043                          * verification, and unlock the map.
11044                          */
11045                         version.main_timestamp = map->timestamp;
11046                         vm_map_unlock(map);     /* Increments timestamp once! */
11047
11048                         /*
11049                          * Perform the copy.
11050                          */
11051                         if (src_entry->wired_count > 0) {
11052                                 vm_object_lock(object);
11053                                 result = vm_object_copy_slowly(
11054                                         object,
11055                                         offset,
11056                                         entry_size,
11057                                         THREAD_UNINT,
11058                                         &new_entry->object.vm_object);
11059
11060                                 new_entry->offset = 0;
11061                                 new_entry->needs_copy = FALSE;
11062                         } else {
11063                                 result = vm_object_copy_strategically(
11064                                         object,
11065                                         offset,
11066                                         entry_size,
11067                                         &new_entry->object.vm_object,
11068                                         &new_entry->offset,
11069                                         &new_entry_needs_copy);
11070
11071                                 new_entry->needs_copy = new_entry_needs_copy;
11072                         }
11073
11074                         /*
11075                          * Throw away the old object reference of the new entry.
11076                          */
11077                         vm_object_deallocate(object);
11078
11079                         if (result != KERN_SUCCESS &&
11080                             result != KERN_MEMORY_RESTART_COPY) {
11081                                 _vm_map_entry_dispose(map_header, new_entry);
11082                                 break;
11083                         }
11084
11085                         /*
11086                          * Verify that the map has not substantially
11087                          * changed while the copy was being made.
11088                          */
11089
11090                         vm_map_lock(map);
11091                         if (version.main_timestamp + 1 != map->timestamp) {
11092                                 /*
11093                                  * Simple version comparison failed.
11094                                  *
11095                                  * Retry the lookup and verify that the
11096                                  * same object/offset are still present.
11097                                  */
11098                                 vm_object_deallocate(new_entry->
11099                                                      object.vm_object);
11100                                 _vm_map_entry_dispose(map_header, new_entry);
11101                                 if (result == KERN_MEMORY_RESTART_COPY)
11102                                         result = KERN_SUCCESS;
11103                                 continue;
11104                         }
11105
11106                         if (result == KERN_MEMORY_RESTART_COPY) {
11107                                 vm_object_reference(object);
11108                                 goto RestartCopy;
11109                         }
11110                 }
11111
11112                 _vm_map_entry_link(map_header,
11113                                    map_header->links.prev, new_entry);
11114
11115                 *cur_protection &= src_entry->protection;
11116                 *max_protection &= src_entry->max_protection;
11117
11118                 map_address += tmp_size;
11119                 mapped_size += tmp_size;
11120                 src_start += tmp_size;
11121
11122         } /* end while */
11123
11124         vm_map_unlock(map);
11125         if (result != KERN_SUCCESS) {
11126                 /*
11127                  * Free all allocated elements.
11128                  */
11129                 for (src_entry = map_header->links.next;
11130                      src_entry != (struct vm_map_entry *)&map_header->links;
11131                      src_entry = new_entry) {
11132                         new_entry = src_entry->vme_next;
11133                         _vm_map_entry_unlink(map_header, src_entry);
11134                         vm_object_deallocate(src_entry->object.vm_object);
11135                         _vm_map_entry_dispose(map_header, src_entry);
11136                 }
11137         }
11138         return result;
11139 }
11140
11141 /*
11142  *      Routine:        vm_remap
11143  *
11144  *                      Map portion of a task's address space.
11145  *                      Mapped region must not overlap more than
11146  *                      one vm memory object. Protections and
11147  *                      inheritance attributes remain the same
11148  *                      as in the original task and are out parameters.
11149  *                      Source and Target task can be identical
11150  *                      Other attributes are identical as for vm_map()
11151  */
11152 kern_return_t
11153 vm_map_remap(
11154         vm_map_t                target_map,
11155         vm_map_address_t        *address,
11156         vm_map_size_t           size,
11157         vm_map_offset_t         mask,
11158         int                     flags,
11159         vm_map_t                src_map,
11160         vm_map_offset_t         memory_address,
11161         boolean_t               copy,
11162         vm_prot_t               *cur_protection,
11163         vm_prot_t               *max_protection,
11164         vm_inherit_t            inheritance)
11165 {
11166         kern_return_t           result;
11167         vm_map_entry_t          entry;
11168         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11169         vm_map_entry_t          new_entry;
11170         struct vm_map_header    map_header;
11171
11172         if (target_map == VM_MAP_NULL)
11173                 return KERN_INVALID_ARGUMENT;
11174
11175         switch (inheritance) {
11176         case VM_INHERIT_NONE:
11177         case VM_INHERIT_COPY:
11178         case VM_INHERIT_SHARE:
11179                 if (size != 0 && src_map != VM_MAP_NULL)
11180                         break;
11181                 /*FALL THRU*/
11182         default:
11183                 return KERN_INVALID_ARGUMENT;
11184         }
11185
11186         size = vm_map_round_page(size);
11187
11188         result = vm_map_remap_extract(src_map, memory_address,
11189                                       size, copy, &map_header,
11190                                       cur_protection,
11191                                       max_protection,
11192                                       inheritance,
11193                                       target_map->hdr.
11194                                       entries_pageable);
11195
11196         if (result != KERN_SUCCESS) {
11197                 return result;
11198         }
11199
11200         /*
11201          * Allocate/check a range of free virtual address
11202          * space for the target
11203          */
11204         *address = vm_map_trunc_page(*address);
11205         vm_map_lock(target_map);
11206         result = vm_map_remap_range_allocate(target_map, address, size,
11207                                              mask, flags, &insp_entry);
11208
11209         for (entry = map_header.links.next;
11210              entry != (struct vm_map_entry *)&map_header.links;
11211              entry = new_entry) {
11212                 new_entry = entry->vme_next;
11213                 _vm_map_entry_unlink(&map_header, entry);
11214                 if (result == KERN_SUCCESS) {
11215                         entry->vme_start += *address;
11216                         entry->vme_end += *address;
11217                         vm_map_entry_link(target_map, insp_entry, entry);
11218                         insp_entry = entry;
11219                 } else {
11220                         if (!entry->is_sub_map) {
11221                                 vm_object_deallocate(entry->object.vm_object);
11222                         } else {
11223                                 vm_map_deallocate(entry->object.sub_map);
11224                         }
11225                         _vm_map_entry_dispose(&map_header, entry);
11226                 }
11227         }
11228
11229         if (result == KERN_SUCCESS) {
11230                 target_map->size += size;
11231                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11232         }
11233         vm_map_unlock(target_map);
11234
11235         if (result == KERN_SUCCESS && target_map->wiring_required)
11236                 result = vm_map_wire(target_map, *address,
11237                                      *address + size, *cur_protection, TRUE);
11238         return result;
11239 }
11240
11241 /*
11242  *      Routine:        vm_map_remap_range_allocate
11243  *
11244  *      Description:
11245  *              Allocate a range in the specified virtual address map.
11246  *              returns the address and the map entry just before the allocated
11247  *              range
11248  *
11249  *      Map must be locked.
11250  */
11251
11252 static kern_return_t
11253 vm_map_remap_range_allocate(
11254         vm_map_t                map,
11255         vm_map_address_t        *address,       /* IN/OUT */
11256         vm_map_size_t           size,
11257         vm_map_offset_t         mask,
11258         int                     flags,
11259         vm_map_entry_t          *map_entry)     /* OUT */
11260 {
11261         vm_map_entry_t  entry;
11262         vm_map_offset_t start;
11263         vm_map_offset_t end;
11264         kern_return_t   kr;
11265
11266 StartAgain: ;
11267
11268         start = *address;
11269
11270         if (flags & VM_FLAGS_ANYWHERE)
11271         {
11272                 /*
11273                  *      Calculate the first possible address.
11274                  */
11275
11276                 if (start < map->min_offset)
11277                         start = map->min_offset;
11278                 if (start > map->max_offset)
11279                         return(KERN_NO_SPACE);
11280
11281                 /*
11282                  *      Look for the first possible address;
11283                  *      if there's already something at this
11284                  *      address, we have to start after it.
11285                  */
11286
11287                 assert(first_free_is_valid(map));
11288                 if (start == map->min_offset) {
11289                         if ((entry = map->first_free) != vm_map_to_entry(map))
11290                                 start = entry->vme_end;
11291                 } else {
11292                         vm_map_entry_t  tmp_entry;
11293                         if (vm_map_lookup_entry(map, start, &tmp_entry))
11294                                 start = tmp_entry->vme_end;
11295                         entry = tmp_entry;
11296                 }
11297
11298                 /*
11299                  *      In any case, the "entry" always precedes
11300                  *      the proposed new region throughout the
11301                  *      loop:
11302                  */
11303
11304                 while (TRUE) {
11305                         register vm_map_entry_t next;
11306
11307                         /*
11308                          *      Find the end of the proposed new region.
11309                          *      Be sure we didn't go beyond the end, or
11310                          *      wrap around the address.
11311                          */
11312
11313                         end = ((start + mask) & ~mask);
11314                         if (end < start)
11315                                 return(KERN_NO_SPACE);
11316                         start = end;
11317                         end += size;
11318
11319                         if ((end > map->max_offset) || (end < start)) {
11320                                 if (map->wait_for_space) {
11321                                         if (size <= (map->max_offset -
11322                                                      map->min_offset)) {
11323                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11324                                                 vm_map_unlock(map);
11325                                                 thread_block(THREAD_CONTINUE_NULL);
11326                                                 vm_map_lock(map);
11327                                                 goto StartAgain;
11328                                         }
11329                                 }
11330
11331                                 return(KERN_NO_SPACE);
11332                         }
11333
11334                         /*
11335                          *      If there are no more entries, we must win.
11336                          */
11337
11338                         next = entry->vme_next;
11339                         if (next == vm_map_to_entry(map))
11340                                 break;
11341
11342                         /*
11343                          *      If there is another entry, it must be
11344                          *      after the end of the potential new region.
11345                          */
11346
11347                         if (next->vme_start >= end)
11348                                 break;
11349
11350                         /*
11351                          *      Didn't fit -- move to the next entry.
11352                          */
11353
11354                         entry = next;
11355                         start = entry->vme_end;
11356                 }
11357                 *address = start;
11358         } else {
11359                 vm_map_entry_t          temp_entry;
11360
11361                 /*
11362                  *      Verify that:
11363                  *              the address doesn't itself violate
11364                  *              the mask requirement.
11365                  */
11366
11367                 if ((start & mask) != 0)
11368                         return(KERN_NO_SPACE);
11369
11370
11371                 /*
11372                  *      ...     the address is within bounds
11373                  */
11374
11375                 end = start + size;
11376
11377                 if ((start < map->min_offset) ||
11378                     (end > map->max_offset) ||
11379                     (start >= end)) {
11380                         return(KERN_INVALID_ADDRESS);
11381                 }
11382
11383                 /*
11384                  * If we're asked to overwrite whatever was mapped in that
11385                  * range, first deallocate that range.
11386                  */
11387                 if (flags & VM_FLAGS_OVERWRITE) {
11388                         vm_map_t zap_map;
11389
11390                         /*
11391                          * We use a "zap_map" to avoid having to unlock
11392                          * the "map" in vm_map_delete(), which would compromise
11393                          * the atomicity of the "deallocate" and then "remap"
11394                          * combination.
11395                          */
11396                         zap_map = vm_map_create(PMAP_NULL,
11397                                                 start,
11398                                                 end - start,
11399                                                 map->hdr.entries_pageable);
11400                         if (zap_map == VM_MAP_NULL) {
11401                                 return KERN_RESOURCE_SHORTAGE;
11402                         }
11403
11404                         kr = vm_map_delete(map, start, end,
11405                                            VM_MAP_REMOVE_SAVE_ENTRIES,
11406                                            zap_map);
11407                         if (kr == KERN_SUCCESS) {
11408                                 vm_map_destroy(zap_map,
11409                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11410                                 zap_map = VM_MAP_NULL;
11411                         }
11412                 }
11413
11414                 /*
11415                  *      ...     the starting address isn't allocated
11416                  */
11417
11418                 if (vm_map_lookup_entry(map, start, &temp_entry))
11419                         return(KERN_NO_SPACE);
11420
11421                 entry = temp_entry;
11422
11423                 /*
11424                  *      ...     the next region doesn't overlap the
11425                  *              end point.
11426                  */
11427
11428                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11429                     (entry->vme_next->vme_start < end))
11430                         return(KERN_NO_SPACE);
11431         }
11432         *map_entry = entry;
11433         return(KERN_SUCCESS);
11434 }
11435
11436 /*
11437  *      vm_map_switch:
11438  *
11439  *      Set the address map for the current thread to the specified map
11440  */
11441
11442 vm_map_t
11443 vm_map_switch(
11444         vm_map_t        map)
11445 {
11446         int             mycpu;
11447         thread_t        thread = current_thread();
11448         vm_map_t        oldmap = thread->map;
11449
11450         mp_disable_preemption();
11451         mycpu = cpu_number();
11452
11453         /*
11454          *      Deactivate the current map and activate the requested map
11455          */
11456         PMAP_SWITCH_USER(thread, map, mycpu);
11457
11458         mp_enable_preemption();
11459         return(oldmap);
11460 }
11461
11462
11463 /*
11464  *      Routine:        vm_map_write_user
11465  *
11466  *      Description:
11467  *              Copy out data from a kernel space into space in the
11468  *              destination map. The space must already exist in the
11469  *              destination map.
11470  *              NOTE:  This routine should only be called by threads
11471  *              which can block on a page fault. i.e. kernel mode user
11472  *              threads.
11473  *
11474  */
11475 kern_return_t
11476 vm_map_write_user(
11477         vm_map_t                map,
11478         void                    *src_p,
11479         vm_map_address_t        dst_addr,
11480         vm_size_t               size)
11481 {
11482         kern_return_t   kr = KERN_SUCCESS;
11483
11484         if(current_map() == map) {
11485                 if (copyout(src_p, dst_addr, size)) {
11486                         kr = KERN_INVALID_ADDRESS;
11487                 }
11488         } else {
11489                 vm_map_t        oldmap;
11490
11491                 /* take on the identity of the target map while doing */
11492                 /* the transfer */
11493
11494                 vm_map_reference(map);
11495                 oldmap = vm_map_switch(map);
11496                 if (copyout(src_p, dst_addr, size)) {
11497                         kr = KERN_INVALID_ADDRESS;
11498                 }
11499                 vm_map_switch(oldmap);
11500                 vm_map_deallocate(map);
11501         }
11502         return kr;
11503 }
11504
11505 /*
11506  *      Routine:        vm_map_read_user
11507  *
11508  *      Description:
11509  *              Copy in data from a user space source map into the
11510  *              kernel map. The space must already exist in the
11511  *              kernel map.
11512  *              NOTE:  This routine should only be called by threads
11513  *              which can block on a page fault. i.e. kernel mode user
11514  *              threads.
11515  *
11516  */
11517 kern_return_t
11518 vm_map_read_user(
11519         vm_map_t                map,
11520         vm_map_address_t        src_addr,
11521         void                    *dst_p,
11522         vm_size_t               size)
11523 {
11524         kern_return_t   kr = KERN_SUCCESS;
11525
11526         if(current_map() == map) {
11527                 if (copyin(src_addr, dst_p, size)) {
11528                         kr = KERN_INVALID_ADDRESS;
11529                 }
11530         } else {
11531                 vm_map_t        oldmap;
11532
11533                 /* take on the identity of the target map while doing */
11534                 /* the transfer */
11535
11536                 vm_map_reference(map);
11537                 oldmap = vm_map_switch(map);
11538                 if (copyin(src_addr, dst_p, size)) {
11539                         kr = KERN_INVALID_ADDRESS;
11540                 }
11541                 vm_map_switch(oldmap);
11542                 vm_map_deallocate(map);
11543         }
11544         return kr;
11545 }
11546
11547
11548 /*
11549  *      vm_map_check_protection:
11550  *
11551  *      Assert that the target map allows the specified
11552  *      privilege on the entire address region given.
11553  *      The entire region must be allocated.
11554  */
11555 boolean_t
11556 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11557                         vm_map_offset_t end, vm_prot_t protection)
11558 {
11559         vm_map_entry_t entry;
11560         vm_map_entry_t tmp_entry;
11561
11562         vm_map_lock(map);
11563
11564         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11565         {
11566                 vm_map_unlock(map);
11567                 return (FALSE);
11568         }
11569
11570         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11571                 vm_map_unlock(map);
11572                 return(FALSE);
11573         }
11574
11575         entry = tmp_entry;
11576
11577         while (start < end) {
11578                 if (entry == vm_map_to_entry(map)) {
11579                         vm_map_unlock(map);
11580                         return(FALSE);
11581                 }
11582
11583                 /*
11584                  *      No holes allowed!
11585                  */
11586
11587                 if (start < entry->vme_start) {
11588                         vm_map_unlock(map);
11589                         return(FALSE);
11590                 }
11591
11592                 /*
11593                  * Check protection associated with entry.
11594                  */
11595
11596                 if ((entry->protection & protection) != protection) {
11597                         vm_map_unlock(map);
11598                         return(FALSE);
11599                 }
11600
11601                 /* go to next entry */
11602
11603                 start = entry->vme_end;
11604                 entry = entry->vme_next;
11605         }
11606         vm_map_unlock(map);
11607         return(TRUE);
11608 }
11609
11610 kern_return_t
11611 vm_map_purgable_control(
11612         vm_map_t                map,
11613         vm_map_offset_t         address,
11614         vm_purgable_t           control,
11615         int                     *state)
11616 {
11617         vm_map_entry_t          entry;
11618         vm_object_t             object;
11619         kern_return_t           kr;
11620
11621         /*
11622          * Vet all the input parameters and current type and state of the
11623          * underlaying object.  Return with an error if anything is amiss.
11624          */
11625         if (map == VM_MAP_NULL)
11626                 return(KERN_INVALID_ARGUMENT);
11627
11628         if (control != VM_PURGABLE_SET_STATE &&
11629             control != VM_PURGABLE_GET_STATE &&
11630             control != VM_PURGABLE_PURGE_ALL)
11631                 return(KERN_INVALID_ARGUMENT);
11632
11633         if (control == VM_PURGABLE_PURGE_ALL) {
11634                 vm_purgeable_object_purge_all();
11635                 return KERN_SUCCESS;
11636         }
11637
11638         if (control == VM_PURGABLE_SET_STATE &&
11639             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11640              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11641                 return(KERN_INVALID_ARGUMENT);
11642
11643         vm_map_lock_read(map);
11644
11645         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11646
11647                 /*
11648                  * Must pass a valid non-submap address.
11649                  */
11650                 vm_map_unlock_read(map);
11651                 return(KERN_INVALID_ADDRESS);
11652         }
11653
11654         if ((entry->protection & VM_PROT_WRITE) == 0) {
11655                 /*
11656                  * Can't apply purgable controls to something you can't write.
11657                  */
11658                 vm_map_unlock_read(map);
11659                 return(KERN_PROTECTION_FAILURE);
11660         }
11661
11662         object = entry->object.vm_object;
11663         if (object == VM_OBJECT_NULL) {
11664                 /*
11665                  * Object must already be present or it can't be purgable.
11666                  */
11667                 vm_map_unlock_read(map);
11668                 return KERN_INVALID_ARGUMENT;
11669         }
11670
11671         vm_object_lock(object);
11672
11673         if (entry->offset != 0 ||
11674             entry->vme_end - entry->vme_start != object->size) {
11675                 /*
11676                  * Can only apply purgable controls to the whole (existing)
11677                  * object at once.
11678                  */
11679                 vm_map_unlock_read(map);
11680                 vm_object_unlock(object);
11681                 return KERN_INVALID_ARGUMENT;
11682         }
11683
11684         vm_map_unlock_read(map);
11685
11686         kr = vm_object_purgable_control(object, control, state);
11687
11688         vm_object_unlock(object);
11689
11690         return kr;
11691 }
11692
11693 kern_return_t
11694 vm_map_page_query_internal(
11695         vm_map_t        target_map,
11696         vm_map_offset_t offset,
11697         int             *disposition,
11698         int             *ref_count)
11699 {
11700         kern_return_t                   kr;
11701         vm_page_info_basic_data_t       info;
11702         mach_msg_type_number_t          count;
11703
11704         count = VM_PAGE_INFO_BASIC_COUNT;
11705         kr = vm_map_page_info(target_map,
11706                               offset,
11707                               VM_PAGE_INFO_BASIC,
11708                               (vm_page_info_t) &info,
11709                               &count);
11710         if (kr == KERN_SUCCESS) {
11711                 *disposition = info.disposition;
11712                 *ref_count = info.ref_count;
11713         } else {
11714                 *disposition = 0;
11715                 *ref_count = 0;
11716         }
11717
11718         return kr;
11719 }
11720
11721 kern_return_t
11722 vm_map_page_info(
11723         vm_map_t                map,
11724         vm_map_offset_t         offset,
11725         vm_page_info_flavor_t   flavor,
11726         vm_page_info_t          info,
11727         mach_msg_type_number_t  *count)
11728 {
11729         vm_map_entry_t          map_entry;
11730         vm_object_t             object;
11731         vm_page_t               m;
11732         kern_return_t           kr;
11733         kern_return_t           retval = KERN_SUCCESS;
11734         boolean_t               top_object;
11735         int                     disposition;
11736         int                     ref_count;
11737         vm_object_id_t          object_id;
11738         vm_page_info_basic_t    basic_info;
11739         int                     depth;
11740
11741         switch (flavor) {
11742         case VM_PAGE_INFO_BASIC:
11743                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
11744                         return KERN_INVALID_ARGUMENT;
11745                 }
11746                 break;
11747         default:
11748                 return KERN_INVALID_ARGUMENT;
11749         }
11750
11751         disposition = 0;
11752         ref_count = 0;
11753         object_id = 0;
11754         top_object = TRUE;
11755         depth = 0;
11756
11757         retval = KERN_SUCCESS;
11758         offset = vm_map_trunc_page(offset);
11759
11760         vm_map_lock_read(map);
11761
11762         /*
11763          * First, find the map entry covering "offset", going down
11764          * submaps if necessary.
11765          */
11766         for (;;) {
11767                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
11768                         vm_map_unlock_read(map);
11769                         return KERN_INVALID_ADDRESS;
11770                 }
11771                 /* compute offset from this map entry's start */
11772                 offset -= map_entry->vme_start;
11773                 /* compute offset into this map entry's object (or submap) */
11774                 offset += map_entry->offset;
11775
11776                 if (map_entry->is_sub_map) {
11777                         vm_map_t sub_map;
11778
11779                         sub_map = map_entry->object.sub_map;
11780                         vm_map_lock_read(sub_map);
11781                         vm_map_unlock_read(map);
11782
11783                         map = sub_map;
11784
11785                         ref_count = MAX(ref_count, map->ref_count);
11786                         continue;
11787                 }
11788                 break;
11789         }
11790
11791         object = map_entry->object.vm_object;
11792         if (object == VM_OBJECT_NULL) {
11793                 /* no object -> no page */
11794                 vm_map_unlock_read(map);
11795                 goto done;
11796         }
11797
11798         vm_object_lock(object);
11799         vm_map_unlock_read(map);
11800
11801         /*
11802          * Go down the VM object shadow chain until we find the page
11803          * we're looking for.
11804          */
11805         for (;;) {
11806                 ref_count = MAX(ref_count, object->ref_count);
11807
11808                 m = vm_page_lookup(object, offset);
11809
11810                 if (m != VM_PAGE_NULL) {
11811                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
11812                         break;
11813                 } else {
11814 #if MACH_PAGEMAP
11815                         if (object->existence_map) {
11816                                 if (vm_external_state_get(object->existence_map,
11817                                                           offset) ==
11818                                     VM_EXTERNAL_STATE_EXISTS) {
11819                                         /*
11820                                          * this page has been paged out
11821                                          */
11822                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11823                                         break;
11824                                 }
11825                         } else
11826 #endif
11827                         {
11828                                 if (object->internal &&
11829                                     object->alive &&
11830                                     !object->terminating &&
11831                                     object->pager_ready) {
11832
11833                                         memory_object_t pager;
11834
11835                                         vm_object_paging_begin(object);
11836                                         pager = object->pager;
11837                                         vm_object_unlock(object);
11838
11839                                         /*
11840                                          * Ask the default pager if
11841                                          * it has this page.
11842                                          */
11843                                         kr = memory_object_data_request(
11844                                                 pager,
11845                                                 offset + object->paging_offset,
11846                                                 0, /* just poke the pager */
11847                                                 VM_PROT_READ,
11848                                                 NULL);
11849
11850                                         vm_object_lock(object);
11851                                         vm_object_paging_end(object);
11852
11853                                         if (kr == KERN_SUCCESS) {
11854                                                 /* the default pager has it */
11855                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
11856                                                 break;
11857                                         }
11858                                 }
11859                         }
11860
11861                         if (object->shadow != VM_OBJECT_NULL) {
11862                                 vm_object_t shadow;
11863
11864                                 offset += object->shadow_offset;
11865                                 shadow = object->shadow;
11866
11867                                 vm_object_lock(shadow);
11868                                 vm_object_unlock(object);
11869
11870                                 object = shadow;
11871                                 top_object = FALSE;
11872                                 depth++;
11873                         } else {
11874 //                              if (!object->internal)
11875 //                                      break;
11876 //                              retval = KERN_FAILURE;
11877 //                              goto done_with_object;
11878                                 break;
11879                         }
11880                 }
11881         }
11882         /* The ref_count is not strictly accurate, it measures the number   */
11883         /* of entities holding a ref on the object, they may not be mapping */
11884         /* the object or may not be mapping the section holding the         */
11885         /* target page but its still a ball park number and though an over- */
11886         /* count, it picks up the copy-on-write cases                       */
11887
11888         /* We could also get a picture of page sharing from pmap_attributes */
11889         /* but this would under count as only faulted-in mappings would     */
11890         /* show up.                                                         */
11891
11892         if (top_object == TRUE && object->shadow)
11893                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
11894
11895         if (! object->internal)
11896                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
11897
11898         if (m == VM_PAGE_NULL)
11899                 goto done_with_object;
11900
11901         if (m->fictitious) {
11902                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
11903                 goto done_with_object;
11904         }
11905         if (m->dirty || pmap_is_modified(m->phys_page))
11906                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
11907
11908         if (m->reference || pmap_is_referenced(m->phys_page))
11909                 disposition |= VM_PAGE_QUERY_PAGE_REF;
11910
11911         if (m->speculative)
11912                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
11913
11914         if (m->cs_validated)
11915                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
11916         if (m->cs_tainted)
11917                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
11918
11919 done_with_object:
11920         vm_object_unlock(object);
11921 done:
11922
11923         switch (flavor) {
11924         case VM_PAGE_INFO_BASIC:
11925                 basic_info = (vm_page_info_basic_t) info;
11926                 basic_info->disposition = disposition;
11927                 basic_info->ref_count = ref_count;
11928                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
11929                 basic_info->offset = (memory_object_offset_t) offset;
11930                 basic_info->depth = depth;
11931                 break;
11932         }
11933
11934         return retval;
11935 }
11936
11937 /*
11938  *      vm_map_msync
11939  *
11940  *      Synchronises the memory range specified with its backing store
11941  *      image by either flushing or cleaning the contents to the appropriate
11942  *      memory manager engaging in a memory object synchronize dialog with
11943  *      the manager.  The client doesn't return until the manager issues
11944  *      m_o_s_completed message.  MIG Magically converts user task parameter
11945  *      to the task's address map.
11946  *
11947  *      interpretation of sync_flags
11948  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
11949  *                                pages to manager.
11950  *
11951  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
11952  *                              - discard pages, write dirty or precious
11953  *                                pages back to memory manager.
11954  *
11955  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
11956  *                              - write dirty or precious pages back to
11957  *                                the memory manager.
11958  *
11959  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
11960  *                                is a hole in the region, and we would
11961  *                                have returned KERN_SUCCESS, return
11962  *                                KERN_INVALID_ADDRESS instead.
11963  *
11964  *      NOTE
11965  *      The memory object attributes have not yet been implemented, this
11966  *      function will have to deal with the invalidate attribute
11967  *
11968  *      RETURNS
11969  *      KERN_INVALID_TASK               Bad task parameter
11970  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
11971  *      KERN_SUCCESS                    The usual.
11972  *      KERN_INVALID_ADDRESS            There was a hole in the region.
11973  */
11974
11975 kern_return_t
11976 vm_map_msync(
11977         vm_map_t                map,
11978         vm_map_address_t        address,
11979         vm_map_size_t           size,
11980         vm_sync_t               sync_flags)
11981 {
11982         msync_req_t             msr;
11983         msync_req_t             new_msr;
11984         queue_chain_t           req_q;  /* queue of requests for this msync */
11985         vm_map_entry_t          entry;
11986         vm_map_size_t           amount_left;
11987         vm_object_offset_t      offset;
11988         boolean_t               do_sync_req;
11989         boolean_t               had_hole = FALSE;
11990         memory_object_t         pager;
11991
11992         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
11993             (sync_flags & VM_SYNC_SYNCHRONOUS))
11994                 return(KERN_INVALID_ARGUMENT);
11995
11996         /*
11997          * align address and size on page boundaries
11998          */
11999         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12000         address = vm_map_trunc_page(address);
12001
12002         if (map == VM_MAP_NULL)
12003                 return(KERN_INVALID_TASK);
12004
12005         if (size == 0)
12006                 return(KERN_SUCCESS);
12007
12008         queue_init(&req_q);
12009         amount_left = size;
12010
12011         while (amount_left > 0) {
12012                 vm_object_size_t        flush_size;
12013                 vm_object_t             object;
12014
12015                 vm_map_lock(map);
12016                 if (!vm_map_lookup_entry(map,
12017                                          vm_map_trunc_page(address), &entry)) {
12018
12019                         vm_map_size_t   skip;
12020
12021                         /*
12022                          * hole in the address map.
12023                          */
12024                         had_hole = TRUE;
12025
12026                         /*
12027                          * Check for empty map.
12028                          */
12029                         if (entry == vm_map_to_entry(map) &&
12030                             entry->vme_next == entry) {
12031                                 vm_map_unlock(map);
12032                                 break;
12033                         }
12034                         /*
12035                          * Check that we don't wrap and that
12036                          * we have at least one real map entry.
12037                          */
12038                         if ((map->hdr.nentries == 0) ||
12039                             (entry->vme_next->vme_start < address)) {
12040                                 vm_map_unlock(map);
12041                                 break;
12042                         }
12043                         /*
12044                          * Move up to the next entry if needed
12045                          */
12046                         skip = (entry->vme_next->vme_start - address);
12047                         if (skip >= amount_left)
12048                                 amount_left = 0;
12049                         else
12050                                 amount_left -= skip;
12051                         address = entry->vme_next->vme_start;
12052                         vm_map_unlock(map);
12053                         continue;
12054                 }
12055
12056                 offset = address - entry->vme_start;
12057
12058                 /*
12059                  * do we have more to flush than is contained in this
12060                  * entry ?
12061                  */
12062                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12063                         flush_size = entry->vme_end -
12064                                 (entry->vme_start + offset);
12065                 } else {
12066                         flush_size = amount_left;
12067                 }
12068                 amount_left -= flush_size;
12069                 address += flush_size;
12070
12071                 if (entry->is_sub_map == TRUE) {
12072                         vm_map_t        local_map;
12073                         vm_map_offset_t local_offset;
12074
12075                         local_map = entry->object.sub_map;
12076                         local_offset = entry->offset;
12077                         vm_map_unlock(map);
12078                         if (vm_map_msync(
12079                                     local_map,
12080                                     local_offset,
12081                                     flush_size,
12082                                     sync_flags) == KERN_INVALID_ADDRESS) {
12083                                 had_hole = TRUE;
12084                         }
12085                         continue;
12086                 }
12087                 object = entry->object.vm_object;
12088
12089                 /*
12090                  * We can't sync this object if the object has not been
12091                  * created yet
12092                  */
12093                 if (object == VM_OBJECT_NULL) {
12094                         vm_map_unlock(map);
12095                         continue;
12096                 }
12097                 offset += entry->offset;
12098
12099                 vm_object_lock(object);
12100
12101                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12102                         int kill_pages = 0;
12103                         boolean_t reusable_pages = FALSE;
12104
12105                         if (sync_flags & VM_SYNC_KILLPAGES) {
12106                                 if (object->ref_count == 1 && !object->shadow)
12107                                         kill_pages = 1;
12108                                 else
12109                                         kill_pages = -1;
12110                         }
12111                         if (kill_pages != -1)
12112                                 vm_object_deactivate_pages(object, offset,
12113                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12114                         vm_object_unlock(object);
12115                         vm_map_unlock(map);
12116                         continue;
12117                 }
12118                 /*
12119                  * We can't sync this object if there isn't a pager.
12120                  * Don't bother to sync internal objects, since there can't
12121                  * be any "permanent" storage for these objects anyway.
12122                  */
12123                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12124                     (object->internal) || (object->private)) {
12125                         vm_object_unlock(object);
12126                         vm_map_unlock(map);
12127                         continue;
12128                 }
12129                 /*
12130                  * keep reference on the object until syncing is done
12131                  */
12132                 vm_object_reference_locked(object);
12133                 vm_object_unlock(object);
12134
12135                 vm_map_unlock(map);
12136
12137                 do_sync_req = vm_object_sync(object,
12138                                              offset,
12139                                              flush_size,
12140                                              sync_flags & VM_SYNC_INVALIDATE,
12141                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12142                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12143                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12144                 /*
12145                  * only send a m_o_s if we returned pages or if the entry
12146                  * is writable (ie dirty pages may have already been sent back)
12147                  */
12148                 if (!do_sync_req) {
12149                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12150                                 /*
12151                                  * clear out the clustering and read-ahead hints
12152                                  */
12153                                 vm_object_lock(object);
12154
12155                                 object->pages_created = 0;
12156                                 object->pages_used = 0;
12157                                 object->sequential = 0;
12158                                 object->last_alloc = 0;
12159
12160                                 vm_object_unlock(object);
12161                         }
12162                         vm_object_deallocate(object);
12163                         continue;
12164                 }
12165                 msync_req_alloc(new_msr);
12166
12167                 vm_object_lock(object);
12168                 offset += object->paging_offset;
12169
12170                 new_msr->offset = offset;
12171                 new_msr->length = flush_size;
12172                 new_msr->object = object;
12173                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12174         re_iterate:
12175
12176                 /*
12177                  * We can't sync this object if there isn't a pager.  The
12178                  * pager can disappear anytime we're not holding the object
12179                  * lock.  So this has to be checked anytime we goto re_iterate.
12180                  */
12181
12182                 pager = object->pager;
12183
12184                 if (pager == MEMORY_OBJECT_NULL) {
12185                         vm_object_unlock(object);
12186                         vm_object_deallocate(object);
12187                         continue;
12188                 }
12189
12190                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12191                         /*
12192                          * need to check for overlapping entry, if found, wait
12193                          * on overlapping msr to be done, then reiterate
12194                          */
12195                         msr_lock(msr);
12196                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12197                             ((offset >= msr->offset &&
12198                               offset < (msr->offset + msr->length)) ||
12199                              (msr->offset >= offset &&
12200                               msr->offset < (offset + flush_size))))
12201                         {
12202                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12203                                 msr_unlock(msr);
12204                                 vm_object_unlock(object);
12205                                 thread_block(THREAD_CONTINUE_NULL);
12206                                 vm_object_lock(object);
12207                                 goto re_iterate;
12208                         }
12209                         msr_unlock(msr);
12210                 }/* queue_iterate */
12211
12212                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12213
12214                 vm_object_paging_begin(object);
12215                 vm_object_unlock(object);
12216
12217                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12218
12219                 (void) memory_object_synchronize(
12220                         pager,
12221                         offset,
12222                         flush_size,
12223                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12224
12225                 vm_object_lock(object);
12226                 vm_object_paging_end(object);
12227                 vm_object_unlock(object);
12228         }/* while */
12229
12230         /*
12231          * wait for memory_object_sychronize_completed messages from pager(s)
12232          */
12233
12234         while (!queue_empty(&req_q)) {
12235                 msr = (msync_req_t)queue_first(&req_q);
12236                 msr_lock(msr);
12237                 while(msr->flag != VM_MSYNC_DONE) {
12238                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12239                         msr_unlock(msr);
12240                         thread_block(THREAD_CONTINUE_NULL);
12241                         msr_lock(msr);
12242                 }/* while */
12243                 queue_remove(&req_q, msr, msync_req_t, req_q);
12244                 msr_unlock(msr);
12245                 vm_object_deallocate(msr->object);
12246                 msync_req_free(msr);
12247         }/* queue_iterate */
12248
12249         /* for proper msync() behaviour */
12250         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12251                 return(KERN_INVALID_ADDRESS);
12252
12253         return(KERN_SUCCESS);
12254 }/* vm_msync */
12255
12256 /*
12257  *      Routine:        convert_port_entry_to_map
12258  *      Purpose:
12259  *              Convert from a port specifying an entry or a task
12260  *              to a map. Doesn't consume the port ref; produces a map ref,
12261  *              which may be null.  Unlike convert_port_to_map, the
12262  *              port may be task or a named entry backed.
12263  *      Conditions:
12264  *              Nothing locked.
12265  */
12266
12267
12268 vm_map_t
12269 convert_port_entry_to_map(
12270         ipc_port_t      port)
12271 {
12272         vm_map_t map;
12273         vm_named_entry_t        named_entry;
12274         uint32_t        try_failed_count = 0;
12275
12276         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12277                 while(TRUE) {
12278                         ip_lock(port);
12279                         if(ip_active(port) && (ip_kotype(port)
12280                                                == IKOT_NAMED_ENTRY)) {
12281                                 named_entry =
12282                                         (vm_named_entry_t)port->ip_kobject;
12283                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12284                                         ip_unlock(port);
12285
12286                                         try_failed_count++;
12287                                         mutex_pause(try_failed_count);
12288                                         continue;
12289                                 }
12290                                 named_entry->ref_count++;
12291                                 lck_mtx_unlock(&(named_entry)->Lock);
12292                                 ip_unlock(port);
12293                                 if ((named_entry->is_sub_map) &&
12294                                     (named_entry->protection
12295                                      & VM_PROT_WRITE)) {
12296                                         map = named_entry->backing.map;
12297                                 } else {
12298                                         mach_destroy_memory_entry(port);
12299                                         return VM_MAP_NULL;
12300                                 }
12301                                 vm_map_reference_swap(map);
12302                                 mach_destroy_memory_entry(port);
12303                                 break;
12304                         }
12305                         else
12306                                 return VM_MAP_NULL;
12307                 }
12308         }
12309         else
12310                 map = convert_port_to_map(port);
12311
12312         return map;
12313 }
12314
12315 /*
12316  *      Routine:        convert_port_entry_to_object
12317  *      Purpose:
12318  *              Convert from a port specifying a named entry to an
12319  *              object. Doesn't consume the port ref; produces a map ref,
12320  *              which may be null.
12321  *      Conditions:
12322  *              Nothing locked.
12323  */
12324
12325
12326 vm_object_t
12327 convert_port_entry_to_object(
12328         ipc_port_t      port)
12329 {
12330         vm_object_t object;
12331         vm_named_entry_t        named_entry;
12332         uint32_t        try_failed_count = 0;
12333
12334         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12335                 while(TRUE) {
12336                         ip_lock(port);
12337                         if(ip_active(port) && (ip_kotype(port)
12338                                                == IKOT_NAMED_ENTRY)) {
12339                                 named_entry =
12340                                         (vm_named_entry_t)port->ip_kobject;
12341                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12342                                         ip_unlock(port);
12343
12344                                         try_failed_count++;
12345                                         mutex_pause(try_failed_count);
12346                                         continue;
12347                                 }
12348                                 named_entry->ref_count++;
12349                                 lck_mtx_unlock(&(named_entry)->Lock);
12350                                 ip_unlock(port);
12351                                 if ((!named_entry->is_sub_map) &&
12352                                     (!named_entry->is_pager) &&
12353                                     (named_entry->protection
12354                                      & VM_PROT_WRITE)) {
12355                                         object = named_entry->backing.object;
12356                                 } else {
12357                                         mach_destroy_memory_entry(port);
12358                                         return (vm_object_t)NULL;
12359                                 }
12360                                 vm_object_reference(named_entry->backing.object);
12361                                 mach_destroy_memory_entry(port);
12362                                 break;
12363                         }
12364                         else
12365                                 return (vm_object_t)NULL;
12366                 }
12367         } else {
12368                 return (vm_object_t)NULL;
12369         }
12370
12371         return object;
12372 }
12373
12374 /*
12375  * Export routines to other components for the things we access locally through
12376  * macros.
12377  */
12378 #undef current_map
12379 vm_map_t
12380 current_map(void)
12381 {
12382         return (current_map_fast());
12383 }
12384
12385 /*
12386  *      vm_map_reference:
12387  *
12388  *      Most code internal to the osfmk will go through a
12389  *      macro defining this.  This is always here for the
12390  *      use of other kernel components.
12391  */
12392 #undef vm_map_reference
12393 void
12394 vm_map_reference(
12395         register vm_map_t       map)
12396 {
12397         if (map == VM_MAP_NULL)
12398                 return;
12399
12400         lck_mtx_lock(&map->s_lock);
12401 #if     TASK_SWAPPER
12402         assert(map->res_count > 0);
12403         assert(map->ref_count >= map->res_count);
12404         map->res_count++;
12405 #endif
12406         map->ref_count++;
12407         lck_mtx_unlock(&map->s_lock);
12408 }
12409
12410 /*
12411  *      vm_map_deallocate:
12412  *
12413  *      Removes a reference from the specified map,
12414  *      destroying it if no references remain.
12415  *      The map should not be locked.
12416  */
12417 void
12418 vm_map_deallocate(
12419         register vm_map_t       map)
12420 {
12421         unsigned int            ref;
12422
12423         if (map == VM_MAP_NULL)
12424                 return;
12425
12426         lck_mtx_lock(&map->s_lock);
12427         ref = --map->ref_count;
12428         if (ref > 0) {
12429                 vm_map_res_deallocate(map);
12430                 lck_mtx_unlock(&map->s_lock);
12431                 return;
12432         }
12433         assert(map->ref_count == 0);
12434         lck_mtx_unlock(&map->s_lock);
12435
12436 #if     TASK_SWAPPER
12437         /*
12438          * The map residence count isn't decremented here because
12439          * the vm_map_delete below will traverse the entire map,
12440          * deleting entries, and the residence counts on objects
12441          * and sharing maps will go away then.
12442          */
12443 #endif
12444
12445         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12446 }
12447
12448
12449 void
12450 vm_map_disable_NX(vm_map_t map)
12451 {
12452         if (map == NULL)
12453                 return;
12454         if (map->pmap == NULL)
12455                 return;
12456
12457         pmap_disable_NX(map->pmap);
12458 }
12459
12460 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12461  * more descriptive.
12462  */
12463 void
12464 vm_map_set_32bit(vm_map_t map)
12465 {
12466         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12467 }
12468
12469
12470 void
12471 vm_map_set_64bit(vm_map_t map)
12472 {
12473         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12474 }
12475
12476 vm_map_offset_t
12477 vm_compute_max_offset(unsigned is64)
12478 {
12479         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12480 }
12481
12482 boolean_t
12483 vm_map_is_64bit(
12484                 vm_map_t map)
12485 {
12486         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12487 }
12488
12489 boolean_t
12490 vm_map_has_4GB_pagezero(
12491                 vm_map_t map)
12492 {
12493         /*
12494          * XXX FBDP
12495          * We should lock the VM map (for read) here but we can get away
12496          * with it for now because there can't really be any race condition:
12497          * the VM map's min_offset is changed only when the VM map is created
12498          * and when the zero page is established (when the binary gets loaded),
12499          * and this routine gets called only when the task terminates and the
12500          * VM map is being torn down, and when a new map is created via
12501          * load_machfile()/execve().
12502          */
12503         return (map->min_offset >= 0x100000000ULL);
12504 }
12505
12506 void
12507 vm_map_set_4GB_pagezero(vm_map_t map)
12508 {
12509 #ifdef __i386__
12510         pmap_set_4GB_pagezero(map->pmap);
12511 #else
12512 #pragma unused(map)
12513 #endif
12514
12515 }
12516
12517 void
12518 vm_map_clear_4GB_pagezero(vm_map_t map)
12519 {
12520 #ifdef __i386__
12521         pmap_clear_4GB_pagezero(map->pmap);
12522 #else
12523 #pragma unused(map)
12524 #endif
12525 }
12526
12527 /*
12528  * Raise a VM map's minimum offset.
12529  * To strictly enforce "page zero" reservation.
12530  */
12531 kern_return_t
12532 vm_map_raise_min_offset(
12533         vm_map_t        map,
12534         vm_map_offset_t new_min_offset)
12535 {
12536         vm_map_entry_t  first_entry;
12537
12538         new_min_offset = vm_map_round_page(new_min_offset);
12539
12540         vm_map_lock(map);
12541
12542         if (new_min_offset < map->min_offset) {
12543                 /*
12544                  * Can't move min_offset backwards, as that would expose
12545                  * a part of the address space that was previously, and for
12546                  * possibly good reasons, inaccessible.
12547                  */
12548                 vm_map_unlock(map);
12549                 return KERN_INVALID_ADDRESS;
12550         }
12551
12552         first_entry = vm_map_first_entry(map);
12553         if (first_entry != vm_map_to_entry(map) &&
12554             first_entry->vme_start < new_min_offset) {
12555                 /*
12556                  * Some memory was already allocated below the new
12557                  * minimun offset.  It's too late to change it now...
12558                  */
12559                 vm_map_unlock(map);
12560                 return KERN_NO_SPACE;
12561         }
12562
12563         map->min_offset = new_min_offset;
12564
12565         vm_map_unlock(map);
12566
12567         return KERN_SUCCESS;
12568 }
12569
12570 /*
12571  * Set the limit on the maximum amount of user wired memory allowed for this map.
12572  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12573  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12574  * don't have to reach over to the BSD data structures.
12575  */
12576
12577 void
12578 vm_map_set_user_wire_limit(vm_map_t     map,
12579                            vm_size_t    limit)
12580 {
12581         map->user_wire_limit = limit;
12582 }
12583
12584
12585 void vm_map_switch_protect(vm_map_t     map,
12586                            boolean_t    val)
12587 {
12588         vm_map_lock(map);
12589         map->switch_protect=val;
12590         vm_map_unlock(map);
12591 }
12592
12593 /* Add (generate) code signature for memory range */
12594 #if CONFIG_DYNAMIC_CODE_SIGNING
12595 kern_return_t vm_map_sign(vm_map_t map,
12596                  vm_map_offset_t start,
12597                  vm_map_offset_t end)
12598 {
12599         vm_map_entry_t entry;
12600         vm_page_t m;
12601         vm_object_t object;
12602
12603         /*
12604          * Vet all the input parameters and current type and state of the
12605          * underlaying object.  Return with an error if anything is amiss.
12606          */
12607         if (map == VM_MAP_NULL)
12608                 return(KERN_INVALID_ARGUMENT);
12609
12610         vm_map_lock_read(map);
12611
12612         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12613                 /*
12614                  * Must pass a valid non-submap address.
12615                  */
12616                 vm_map_unlock_read(map);
12617                 return(KERN_INVALID_ADDRESS);
12618         }
12619
12620         if((entry->vme_start > start) || (entry->vme_end < end)) {
12621                 /*
12622                  * Map entry doesn't cover the requested range. Not handling
12623                  * this situation currently.
12624                  */
12625                 vm_map_unlock_read(map);
12626                 return(KERN_INVALID_ARGUMENT);
12627         }
12628
12629         object = entry->object.vm_object;
12630         if (object == VM_OBJECT_NULL) {
12631                 /*
12632                  * Object must already be present or we can't sign.
12633                  */
12634                 vm_map_unlock_read(map);
12635                 return KERN_INVALID_ARGUMENT;
12636         }
12637
12638         vm_object_lock(object);
12639         vm_map_unlock_read(map);
12640
12641         while(start < end) {
12642                 uint32_t refmod;
12643
12644                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12645                 if (m==VM_PAGE_NULL) {
12646                         /* shoud we try to fault a page here? we can probably
12647                          * demand it exists and is locked for this request */
12648                         vm_object_unlock(object);
12649                         return KERN_FAILURE;
12650                 }
12651                 /* deal with special page status */
12652                 if (m->busy ||
12653                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12654                         vm_object_unlock(object);
12655                         return KERN_FAILURE;
12656                 }
12657
12658                 /* Page is OK... now "validate" it */
12659                 /* This is the place where we'll call out to create a code
12660                  * directory, later */
12661                 m->cs_validated = TRUE;
12662
12663                 /* The page is now "clean" for codesigning purposes. That means
12664                  * we don't consider it as modified (wpmapped) anymore. But
12665                  * we'll disconnect the page so we note any future modification
12666                  * attempts. */
12667                 m->wpmapped = FALSE;
12668                 refmod = pmap_disconnect(m->phys_page);
12669
12670                 /* Pull the dirty status from the pmap, since we cleared the
12671                  * wpmapped bit */
12672                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
12673                         m->dirty = TRUE;
12674                 }
12675
12676                 /* On to the next page */
12677                 start += PAGE_SIZE;
12678         }
12679         vm_object_unlock(object);
12680
12681         return KERN_SUCCESS;
12682 }
12683 #endif