osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68 #include <libkern/OSAtomic.h>
  69
  70 #include <mach/kern_return.h>
  71 #include <mach/port.h>
  72 #include <mach/vm_attributes.h>
  73 #include <mach/vm_param.h>
  74 #include <mach/vm_behavior.h>
  75 #include <mach/vm_statistics.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/mach_vm.h>
  78 #include <machine/cpu_capabilities.h>
  79 #include <mach/sdt.h>
  80
  81 #include <kern/assert.h>
  82 #include <kern/counters.h>
  83 #include <kern/kalloc.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <vm/cpm.h>
  87 #include <vm/vm_init.h>
  88 #include <vm/vm_fault.h>
  89 #include <vm/vm_map.h>
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_page.h>
  92 #include <vm/vm_pageout.h>
  93 #include <vm/vm_kern.h>
  94 #include <ipc/ipc_port.h>
  95 #include <kern/sched_prim.h>
  96 #include <kern/misc_protos.h>
  97 #include <kern/xpr.h>
  98
  99 #include <mach/vm_map_server.h>
 100 #include <mach/mach_host_server.h>
 101 #include <vm/vm_protos.h>
 102 #include <vm/vm_purgeable_internal.h>
 103
 104 #include <vm/vm_protos.h>
 105 #include <vm/vm_shared_region.h>
 106 #include <vm/vm_map_store.h>
 107
 108 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 109 /* Internal prototypes
 110  */
 111
 112 static void vm_map_simplify_range(
 113         vm_map_t        map,
 114         vm_map_offset_t start,
 115         vm_map_offset_t end);   /* forward */
 116
 117 static boolean_t        vm_map_range_check(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end,
 121         vm_map_entry_t  *entry);
 122
 123 static vm_map_entry_t   _vm_map_entry_create(
 124         struct vm_map_header    *map_header, boolean_t map_locked);
 125
 126 static void             _vm_map_entry_dispose(
 127         struct vm_map_header    *map_header,
 128         vm_map_entry_t          entry);
 129
 130 static void             vm_map_pmap_enter(
 131         vm_map_t                map,
 132         vm_map_offset_t         addr,
 133         vm_map_offset_t         end_addr,
 134         vm_object_t             object,
 135         vm_object_offset_t      offset,
 136         vm_prot_t               protection);
 137
 138 static void             _vm_map_clip_end(
 139         struct vm_map_header    *map_header,
 140         vm_map_entry_t          entry,
 141         vm_map_offset_t         end);
 142
 143 static void             _vm_map_clip_start(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         start);
 147
 148 static void             vm_map_entry_delete(
 149         vm_map_t        map,
 150         vm_map_entry_t  entry);
 151
 152 static kern_return_t    vm_map_delete(
 153         vm_map_t        map,
 154         vm_map_offset_t start,
 155         vm_map_offset_t end,
 156         int             flags,
 157         vm_map_t        zap_map);
 158
 159 static kern_return_t    vm_map_copy_overwrite_unaligned(
 160         vm_map_t        dst_map,
 161         vm_map_entry_t  entry,
 162         vm_map_copy_t   copy,
 163         vm_map_address_t start);
 164
 165 static kern_return_t    vm_map_copy_overwrite_aligned(
 166         vm_map_t        dst_map,
 167         vm_map_entry_t  tmp_entry,
 168         vm_map_copy_t   copy,
 169         vm_map_offset_t start,
 170         pmap_t          pmap);
 171
 172 static kern_return_t    vm_map_copyin_kernel_buffer(
 173         vm_map_t        src_map,
 174         vm_map_address_t src_addr,
 175         vm_map_size_t   len,
 176         boolean_t       src_destroy,
 177         vm_map_copy_t   *copy_result);  /* OUT */
 178
 179 static kern_return_t    vm_map_copyout_kernel_buffer(
 180         vm_map_t        map,
 181         vm_map_address_t *addr, /* IN/OUT */
 182         vm_map_copy_t   copy,
 183         boolean_t       overwrite);
 184
 185 static void             vm_map_fork_share(
 186         vm_map_t        old_map,
 187         vm_map_entry_t  old_entry,
 188         vm_map_t        new_map);
 189
 190 static boolean_t        vm_map_fork_copy(
 191         vm_map_t        old_map,
 192         vm_map_entry_t  *old_entry_p,
 193         vm_map_t        new_map);
 194
 195 void            vm_map_region_top_walk(
 196         vm_map_entry_t             entry,
 197         vm_region_top_info_t       top);
 198
 199 void            vm_map_region_walk(
 200         vm_map_t                   map,
 201         vm_map_offset_t            va,
 202         vm_map_entry_t             entry,
 203         vm_object_offset_t         offset,
 204         vm_object_size_t           range,
 205         vm_region_extended_info_t  extended,
 206         boolean_t                  look_for_pages);
 207
 208 static kern_return_t    vm_map_wire_nested(
 209         vm_map_t                   map,
 210         vm_map_offset_t            start,
 211         vm_map_offset_t            end,
 212         vm_prot_t                  access_type,
 213         boolean_t                  user_wire,
 214         pmap_t                     map_pmap,
 215         vm_map_offset_t            pmap_addr);
 216
 217 static kern_return_t    vm_map_unwire_nested(
 218         vm_map_t                   map,
 219         vm_map_offset_t            start,
 220         vm_map_offset_t            end,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr);
 224
 225 static kern_return_t    vm_map_overwrite_submap_recurse(
 226         vm_map_t                   dst_map,
 227         vm_map_offset_t            dst_addr,
 228         vm_map_size_t              dst_size);
 229
 230 static kern_return_t    vm_map_copy_overwrite_nested(
 231         vm_map_t                   dst_map,
 232         vm_map_offset_t            dst_addr,
 233         vm_map_copy_t              copy,
 234         boolean_t                  interruptible,
 235         pmap_t                     pmap,
 236         boolean_t                  discard_on_success);
 237
 238 static kern_return_t    vm_map_remap_extract(
 239         vm_map_t                map,
 240         vm_map_offset_t         addr,
 241         vm_map_size_t           size,
 242         boolean_t               copy,
 243         struct vm_map_header    *map_header,
 244         vm_prot_t               *cur_protection,
 245         vm_prot_t               *max_protection,
 246         vm_inherit_t            inheritance,
 247         boolean_t               pageable);
 248
 249 static kern_return_t    vm_map_remap_range_allocate(
 250         vm_map_t                map,
 251         vm_map_address_t        *address,
 252         vm_map_size_t           size,
 253         vm_map_offset_t         mask,
 254         int                     flags,
 255         vm_map_entry_t          *map_entry);
 256
 257 static void             vm_map_region_look_for_page(
 258         vm_map_t                   map,
 259         vm_map_offset_t            va,
 260         vm_object_t                object,
 261         vm_object_offset_t         offset,
 262         int                        max_refcnt,
 263         int                        depth,
 264         vm_region_extended_info_t  extended);
 265
 266 static int              vm_map_region_count_obj_refs(
 267         vm_map_entry_t             entry,
 268         vm_object_t                object);
 269
 270
 271 static kern_return_t    vm_map_willneed(
 272         vm_map_t        map,
 273         vm_map_offset_t start,
 274         vm_map_offset_t end);
 275
 276 static kern_return_t    vm_map_reuse_pages(
 277         vm_map_t        map,
 278         vm_map_offset_t start,
 279         vm_map_offset_t end);
 280
 281 static kern_return_t    vm_map_reusable_pages(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_can_reuse(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291
 292 /*
 293  * Macros to copy a vm_map_entry. We must be careful to correctly
 294  * manage the wired page count. vm_map_entry_copy() creates a new
 295  * map entry to the same memory - the wired count in the new entry
 296  * must be set to zero. vm_map_entry_copy_full() creates a new
 297  * entry that is identical to the old entry.  This preserves the
 298  * wire count; it's used for map splitting and zone changing in
 299  * vm_map_copyout.
 300  */
 301
 302 #define vm_map_entry_copy(NEW,OLD)      \
 303 MACRO_BEGIN                             \
 304 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 305         *(NEW) = *(OLD);                \
 306         (NEW)->is_shared = FALSE;       \
 307         (NEW)->needs_wakeup = FALSE;    \
 308         (NEW)->in_transition = FALSE;   \
 309         (NEW)->wired_count = 0;         \
 310         (NEW)->user_wired_count = 0;    \
 311         (NEW)->permanent = FALSE;       \
 312         (NEW)->used_for_jit = FALSE;    \
 313         (NEW)->from_reserved_zone = _vmec_reserved;                     \
 314 MACRO_END
 315
 316 #define vm_map_entry_copy_full(NEW,OLD)                 \
 317 MACRO_BEGIN                                             \
 318 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 319 (*(NEW) = *(OLD));                                      \
 320 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 321 MACRO_END
 322
 323 /*
 324  *      Decide if we want to allow processes to execute from their data or stack areas.
 325  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 326  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 327  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 328  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 329  *      specific pmap files since the default behavior varies according to architecture.  The
 330  *      main reason it varies is because of the need to provide binary compatibility with old
 331  *      applications that were written before these restrictions came into being.  In the old
 332  *      days, an app could execute anything it could read, but this has slowly been tightened
 333  *      up over time.  The default behavior is:
 334  *
 335  *      32-bit PPC apps         may execute from both stack and data areas
 336  *      32-bit Intel apps       may exeucte from data areas but not stack
 337  *      64-bit PPC/Intel apps   may not execute from either data or stack
 338  *
 339  *      An application on any architecture may override these defaults by explicitly
 340  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 341  *      system call.  This code here just determines what happens when an app tries to
 342  *      execute from a page that lacks execute permission.
 343  *
 344  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 345  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 346  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 347  *      execution from data areas for a particular binary even if the arch normally permits it. As
 348  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 349  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 350  *      are not all NX-safe.
 351  */
 352
 353 extern int allow_data_exec, allow_stack_exec;
 354
 355 int
 356 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 357 {
 358         int current_abi;
 359
 360         /*
 361          * Determine if the app is running in 32 or 64 bit mode.
 362          */
 363
 364         if (vm_map_is_64bit(map))
 365                 current_abi = VM_ABI_64;
 366         else
 367                 current_abi = VM_ABI_32;
 368
 369         /*
 370          * Determine if we should allow the execution based on whether it's a
 371          * stack or data area and the current architecture.
 372          */
 373
 374         if (user_tag == VM_MEMORY_STACK)
 375                 return allow_stack_exec & current_abi;
 376
 377         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 378 }
 379
 380
 381 /*
 382  *      Virtual memory maps provide for the mapping, protection,
 383  *      and sharing of virtual memory objects.  In addition,
 384  *      this module provides for an efficient virtual copy of
 385  *      memory from one map to another.
 386  *
 387  *      Synchronization is required prior to most operations.
 388  *
 389  *      Maps consist of an ordered doubly-linked list of simple
 390  *      entries; a single hint is used to speed up lookups.
 391  *
 392  *      Sharing maps have been deleted from this version of Mach.
 393  *      All shared objects are now mapped directly into the respective
 394  *      maps.  This requires a change in the copy on write strategy;
 395  *      the asymmetric (delayed) strategy is used for shared temporary
 396  *      objects instead of the symmetric (shadow) strategy.  All maps
 397  *      are now "top level" maps (either task map, kernel map or submap
 398  *      of the kernel map).
 399  *
 400  *      Since portions of maps are specified by start/end addreses,
 401  *      which may not align with existing map entries, all
 402  *      routines merely "clip" entries to these start/end values.
 403  *      [That is, an entry is split into two, bordering at a
 404  *      start or end value.]  Note that these clippings may not
 405  *      always be necessary (as the two resulting entries are then
 406  *      not changed); however, the clipping is done for convenience.
 407  *      No attempt is currently made to "glue back together" two
 408  *      abutting entries.
 409  *
 410  *      The symmetric (shadow) copy strategy implements virtual copy
 411  *      by copying VM object references from one map to
 412  *      another, and then marking both regions as copy-on-write.
 413  *      It is important to note that only one writeable reference
 414  *      to a VM object region exists in any map when this strategy
 415  *      is used -- this means that shadow object creation can be
 416  *      delayed until a write operation occurs.  The symmetric (delayed)
 417  *      strategy allows multiple maps to have writeable references to
 418  *      the same region of a vm object, and hence cannot delay creating
 419  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 420  *      Copying of permanent objects is completely different; see
 421  *      vm_object_copy_strategically() in vm_object.c.
 422  */
 423
 424 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 425 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 426 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 427                                          * allocations */
 428 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 429
 430
 431 /*
 432  *      Placeholder object for submap operations.  This object is dropped
 433  *      into the range by a call to vm_map_find, and removed when
 434  *      vm_map_submap creates the submap.
 435  */
 436
 437 vm_object_t     vm_submap_object;
 438
 439 static void             *map_data;
 440 static vm_size_t        map_data_size;
 441 static void             *kentry_data;
 442 static vm_size_t        kentry_data_size;
 443
 444 #if CONFIG_EMBEDDED
 445 #define         NO_COALESCE_LIMIT  0
 446 #else
 447 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 448 #endif
 449
 450 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 451 unsigned int not_in_kdp = 1;
 452
 453 unsigned int vm_map_set_cache_attr_count = 0;
 454
 455 kern_return_t
 456 vm_map_set_cache_attr(
 457         vm_map_t        map,
 458         vm_map_offset_t va)
 459 {
 460         vm_map_entry_t  map_entry;
 461         vm_object_t     object;
 462         kern_return_t   kr = KERN_SUCCESS;
 463
 464         vm_map_lock_read(map);
 465
 466         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 467             map_entry->is_sub_map) {
 468                 /*
 469                  * that memory is not properly mapped
 470                  */
 471                 kr = KERN_INVALID_ARGUMENT;
 472                 goto done;
 473         }
 474         object = map_entry->object.vm_object;
 475
 476         if (object == VM_OBJECT_NULL) {
 477                 /*
 478                  * there should be a VM object here at this point
 479                  */
 480                 kr = KERN_INVALID_ARGUMENT;
 481                 goto done;
 482         }
 483         vm_object_lock(object);
 484         object->set_cache_attr = TRUE;
 485         vm_object_unlock(object);
 486
 487         vm_map_set_cache_attr_count++;
 488 done:
 489         vm_map_unlock_read(map);
 490
 491         return kr;
 492 }
 493
 494
 495 #if CONFIG_CODE_DECRYPTION
 496 /*
 497  * vm_map_apple_protected:
 498  * This remaps the requested part of the object with an object backed by
 499  * the decrypting pager.
 500  * crypt_info contains entry points and session data for the crypt module.
 501  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 502  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 503  */
 504 kern_return_t
 505 vm_map_apple_protected(
 506         vm_map_t        map,
 507         vm_map_offset_t start,
 508         vm_map_offset_t end,
 509         struct pager_crypt_info *crypt_info)
 510 {
 511         boolean_t       map_locked;
 512         kern_return_t   kr;
 513         vm_map_entry_t  map_entry;
 514         memory_object_t protected_mem_obj;
 515         vm_object_t     protected_object;
 516         vm_map_offset_t map_addr;
 517
 518         vm_map_lock_read(map);
 519         map_locked = TRUE;
 520
 521         /* lookup the protected VM object */
 522         if (!vm_map_lookup_entry(map,
 523                                  start,
 524                                  &map_entry) ||
 525             map_entry->vme_end < end ||
 526             map_entry->is_sub_map) {
 527                 /* that memory is not properly mapped */
 528                 kr = KERN_INVALID_ARGUMENT;
 529                 goto done;
 530         }
 531         protected_object = map_entry->object.vm_object;
 532         if (protected_object == VM_OBJECT_NULL) {
 533                 /* there should be a VM object here at this point */
 534                 kr = KERN_INVALID_ARGUMENT;
 535                 goto done;
 536         }
 537
 538         /* make sure protected object stays alive while map is unlocked */
 539         vm_object_reference(protected_object);
 540
 541         vm_map_unlock_read(map);
 542         map_locked = FALSE;
 543
 544         /*
 545          * Lookup (and create if necessary) the protected memory object
 546          * matching that VM object.
 547          * If successful, this also grabs a reference on the memory object,
 548          * to guarantee that it doesn't go away before we get a chance to map
 549          * it.
 550          */
 551         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 552
 553         /* release extra ref on protected object */
 554         vm_object_deallocate(protected_object);
 555
 556         if (protected_mem_obj == NULL) {
 557                 kr = KERN_FAILURE;
 558                 goto done;
 559         }
 560
 561         /* map this memory object in place of the current one */
 562         map_addr = start;
 563         kr = vm_map_enter_mem_object(map,
 564                                      &map_addr,
 565                                      end - start,
 566                                      (mach_vm_offset_t) 0,
 567                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 568                                      (ipc_port_t) protected_mem_obj,
 569                                      (map_entry->offset +
 570                                       (start - map_entry->vme_start)),
 571                                      TRUE,
 572                                      map_entry->protection,
 573                                      map_entry->max_protection,
 574                                      map_entry->inheritance);
 575         assert(map_addr == start);
 576         /*
 577          * Release the reference obtained by apple_protect_pager_setup().
 578          * The mapping (if it succeeded) is now holding a reference on the
 579          * memory object.
 580          */
 581         memory_object_deallocate(protected_mem_obj);
 582
 583 done:
 584         if (map_locked) {
 585                 vm_map_unlock_read(map);
 586         }
 587         return kr;
 588 }
 589 #endif  /* CONFIG_CODE_DECRYPTION */
 590
 591
 592 lck_grp_t               vm_map_lck_grp;
 593 lck_grp_attr_t  vm_map_lck_grp_attr;
 594 lck_attr_t              vm_map_lck_attr;
 595
 596
 597 /*
 598  *      vm_map_init:
 599  *
 600  *      Initialize the vm_map module.  Must be called before
 601  *      any other vm_map routines.
 602  *
 603  *      Map and entry structures are allocated from zones -- we must
 604  *      initialize those zones.
 605  *
 606  *      There are three zones of interest:
 607  *
 608  *      vm_map_zone:            used to allocate maps.
 609  *      vm_map_entry_zone:      used to allocate map entries.
 610  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 611  *
 612  *      The kernel allocates map entries from a special zone that is initially
 613  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 614  *      the kernel to allocate more memory to a entry zone when it became
 615  *      empty since the very act of allocating memory implies the creation
 616  *      of a new entry.
 617  */
 618 void
 619 vm_map_init(
 620         void)
 621 {
 622         vm_size_t entry_zone_alloc_size;
 623         const char *mez_name = "VM map entries";
 624
 625         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 626                             PAGE_SIZE, "maps");
 627         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 628 #if     defined(__LP64__)
 629         entry_zone_alloc_size = PAGE_SIZE * 5;
 630 #else
 631         entry_zone_alloc_size = PAGE_SIZE * 6;
 632 #endif
 633         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 634                                   1024*1024, entry_zone_alloc_size,
 635                                   mez_name);
 636         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 637         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 638         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 639
 640         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 641                                    kentry_data_size * 64, kentry_data_size,
 642                                    "Reserved VM map entries");
 643         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 644
 645         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 646                                  16*1024, PAGE_SIZE, "VM map copies");
 647         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 648
 649         /*
 650          *      Cram the map and kentry zones with initial data.
 651          *      Set reserved_zone non-collectible to aid zone_gc().
 652          */
 653         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 654
 655         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 656         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 657         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 658         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 659         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 660         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 661         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 662
 663         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 664         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 665
 666         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 667         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 668         lck_attr_setdefault(&vm_map_lck_attr);
 669
 670 #if CONFIG_FREEZE
 671         default_freezer_init();
 672 #endif /* CONFIG_FREEZE */
 673 }
 674
 675 void
 676 vm_map_steal_memory(
 677         void)
 678 {
 679         uint32_t kentry_initial_pages;
 680
 681         map_data_size = round_page(10 * sizeof(struct _vm_map));
 682         map_data = pmap_steal_memory(map_data_size);
 683
 684         /*
 685          * kentry_initial_pages corresponds to the number of kernel map entries
 686          * required during bootstrap until the asynchronous replenishment
 687          * scheme is activated and/or entries are available from the general
 688          * map entry pool.
 689          */
 690 #if     defined(__LP64__)
 691         kentry_initial_pages = 10;
 692 #else
 693         kentry_initial_pages = 6;
 694 #endif
 695
 696 #if CONFIG_GZALLOC
 697         /* If using the guard allocator, reserve more memory for the kernel
 698          * reserved map entry pool.
 699         */
 700         if (gzalloc_enabled())
 701                 kentry_initial_pages *= 1024;
 702 #endif
 703
 704         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 705         kentry_data = pmap_steal_memory(kentry_data_size);
 706 }
 707
 708 void vm_kernel_reserved_entry_init(void) {
 709         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 710 }
 711
 712 /*
 713  *      vm_map_create:
 714  *
 715  *      Creates and returns a new empty VM map with
 716  *      the given physical map structure, and having
 717  *      the given lower and upper address bounds.
 718  */
 719 vm_map_t
 720 vm_map_create(
 721         pmap_t                  pmap,
 722         vm_map_offset_t min,
 723         vm_map_offset_t max,
 724         boolean_t               pageable)
 725 {
 726         static int              color_seed = 0;
 727         register vm_map_t       result;
 728
 729         result = (vm_map_t) zalloc(vm_map_zone);
 730         if (result == VM_MAP_NULL)
 731                 panic("vm_map_create");
 732
 733         vm_map_first_entry(result) = vm_map_to_entry(result);
 734         vm_map_last_entry(result)  = vm_map_to_entry(result);
 735         result->hdr.nentries = 0;
 736         result->hdr.entries_pageable = pageable;
 737
 738         vm_map_store_init( &(result->hdr) );
 739
 740         result->size = 0;
 741         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 742         result->user_wire_size  = 0;
 743         result->ref_count = 1;
 744 #if     TASK_SWAPPER
 745         result->res_count = 1;
 746         result->sw_state = MAP_SW_IN;
 747 #endif  /* TASK_SWAPPER */
 748         result->pmap = pmap;
 749         result->min_offset = min;
 750         result->max_offset = max;
 751         result->wiring_required = FALSE;
 752         result->no_zero_fill = FALSE;
 753         result->mapped_in_other_pmaps = FALSE;
 754         result->wait_for_space = FALSE;
 755         result->switch_protect = FALSE;
 756         result->disable_vmentry_reuse = FALSE;
 757         result->map_disallow_data_exec = FALSE;
 758         result->highest_entry_end = 0;
 759         result->first_free = vm_map_to_entry(result);
 760         result->hint = vm_map_to_entry(result);
 761         result->color_rr = (color_seed++) & vm_color_mask;
 762         result->jit_entry_exists = FALSE;
 763 #if CONFIG_FREEZE
 764         result->default_freezer_handle = NULL;
 765 #endif
 766         vm_map_lock_init(result);
 767         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 768
 769         return(result);
 770 }
 771
 772 /*
 773  *      vm_map_entry_create:    [ internal use only ]
 774  *
 775  *      Allocates a VM map entry for insertion in the
 776  *      given map (or map copy).  No fields are filled.
 777  */
 778 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 779
 780 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 781         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 782 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 783
 784 static vm_map_entry_t
 785 _vm_map_entry_create(
 786         struct vm_map_header    *map_header, boolean_t __unused map_locked)
 787 {
 788         zone_t  zone;
 789         vm_map_entry_t  entry;
 790
 791         zone = vm_map_entry_zone;
 792
 793         assert(map_header->entries_pageable ? !map_locked : TRUE);
 794
 795         if (map_header->entries_pageable) {
 796                 entry = (vm_map_entry_t) zalloc(zone);
 797         }
 798         else {
 799                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
 800
 801                 if (entry == VM_MAP_ENTRY_NULL) {
 802                         zone = vm_map_entry_reserved_zone;
 803                         entry = (vm_map_entry_t) zalloc(zone);
 804                         OSAddAtomic(1, &reserved_zalloc_count);
 805                 } else
 806                         OSAddAtomic(1, &nonreserved_zalloc_count);
 807         }
 808
 809         if (entry == VM_MAP_ENTRY_NULL)
 810                 panic("vm_map_entry_create");
 811         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
 812
 813         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 814 #if     MAP_ENTRY_CREATION_DEBUG
 815         fastbacktrace(&entry->vme_bt[0], (sizeof(entry->vme_bt)/sizeof(uintptr_t)));
 816 #endif
 817         return(entry);
 818 }
 819
 820 /*
 821  *      vm_map_entry_dispose:   [ internal use only ]
 822  *
 823  *      Inverse of vm_map_entry_create.
 824  *
 825  *      write map lock held so no need to
 826  *      do anything special to insure correctness
 827  *      of the stores
 828  */
 829 #define vm_map_entry_dispose(map, entry)                        \
 830         _vm_map_entry_dispose(&(map)->hdr, (entry))
 831
 832 #define vm_map_copy_entry_dispose(map, entry) \
 833         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 834
 835 static void
 836 _vm_map_entry_dispose(
 837         register struct vm_map_header   *map_header,
 838         register vm_map_entry_t         entry)
 839 {
 840         register zone_t         zone;
 841
 842         if (map_header->entries_pageable || !(entry->from_reserved_zone))
 843                 zone = vm_map_entry_zone;
 844         else
 845                 zone = vm_map_entry_reserved_zone;
 846
 847         if (!map_header->entries_pageable) {
 848                 if (zone == vm_map_entry_zone)
 849                         OSAddAtomic(-1, &nonreserved_zalloc_count);
 850                 else
 851                         OSAddAtomic(-1, &reserved_zalloc_count);
 852         }
 853
 854         zfree(zone, entry);
 855 }
 856
 857 #if MACH_ASSERT
 858 static boolean_t first_free_check = FALSE;
 859 boolean_t
 860 first_free_is_valid(
 861         vm_map_t        map)
 862 {
 863         if (!first_free_check)
 864                 return TRUE;
 865
 866         return( first_free_is_valid_store( map ));
 867 }
 868 #endif /* MACH_ASSERT */
 869
 870
 871 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 872         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 873
 874 #define vm_map_copy_entry_unlink(copy, entry)                           \
 875         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 876
 877 #if     MACH_ASSERT && TASK_SWAPPER
 878 /*
 879  *      vm_map_res_reference:
 880  *
 881  *      Adds another valid residence count to the given map.
 882  *
 883  *      Map is locked so this function can be called from
 884  *      vm_map_swapin.
 885  *
 886  */
 887 void vm_map_res_reference(register vm_map_t map)
 888 {
 889         /* assert map is locked */
 890         assert(map->res_count >= 0);
 891         assert(map->ref_count >= map->res_count);
 892         if (map->res_count == 0) {
 893                 lck_mtx_unlock(&map->s_lock);
 894                 vm_map_lock(map);
 895                 vm_map_swapin(map);
 896                 lck_mtx_lock(&map->s_lock);
 897                 ++map->res_count;
 898                 vm_map_unlock(map);
 899         } else
 900                 ++map->res_count;
 901 }
 902
 903 /*
 904  *      vm_map_reference_swap:
 905  *
 906  *      Adds valid reference and residence counts to the given map.
 907  *
 908  *      The map may not be in memory (i.e. zero residence count).
 909  *
 910  */
 911 void vm_map_reference_swap(register vm_map_t map)
 912 {
 913         assert(map != VM_MAP_NULL);
 914         lck_mtx_lock(&map->s_lock);
 915         assert(map->res_count >= 0);
 916         assert(map->ref_count >= map->res_count);
 917         map->ref_count++;
 918         vm_map_res_reference(map);
 919         lck_mtx_unlock(&map->s_lock);
 920 }
 921
 922 /*
 923  *      vm_map_res_deallocate:
 924  *
 925  *      Decrement residence count on a map; possibly causing swapout.
 926  *
 927  *      The map must be in memory (i.e. non-zero residence count).
 928  *
 929  *      The map is locked, so this function is callable from vm_map_deallocate.
 930  *
 931  */
 932 void vm_map_res_deallocate(register vm_map_t map)
 933 {
 934         assert(map->res_count > 0);
 935         if (--map->res_count == 0) {
 936                 lck_mtx_unlock(&map->s_lock);
 937                 vm_map_lock(map);
 938                 vm_map_swapout(map);
 939                 vm_map_unlock(map);
 940                 lck_mtx_lock(&map->s_lock);
 941         }
 942         assert(map->ref_count >= map->res_count);
 943 }
 944 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 945
 946 /*
 947  *      vm_map_destroy:
 948  *
 949  *      Actually destroy a map.
 950  */
 951 void
 952 vm_map_destroy(
 953         vm_map_t        map,
 954         int             flags)
 955 {
 956         vm_map_lock(map);
 957
 958         /* clean up regular map entries */
 959         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 960                              flags, VM_MAP_NULL);
 961         /* clean up leftover special mappings (commpage, etc...) */
 962         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 963                              flags, VM_MAP_NULL);
 964
 965 #if CONFIG_FREEZE
 966         if (map->default_freezer_handle) {
 967                 default_freezer_handle_deallocate(map->default_freezer_handle);
 968                 map->default_freezer_handle = NULL;
 969         }
 970 #endif
 971         vm_map_unlock(map);
 972
 973         assert(map->hdr.nentries == 0);
 974
 975         if(map->pmap)
 976                 pmap_destroy(map->pmap);
 977
 978         zfree(vm_map_zone, map);
 979 }
 980
 981 #if     TASK_SWAPPER
 982 /*
 983  * vm_map_swapin/vm_map_swapout
 984  *
 985  * Swap a map in and out, either referencing or releasing its resources.
 986  * These functions are internal use only; however, they must be exported
 987  * because they may be called from macros, which are exported.
 988  *
 989  * In the case of swapout, there could be races on the residence count,
 990  * so if the residence count is up, we return, assuming that a
 991  * vm_map_deallocate() call in the near future will bring us back.
 992  *
 993  * Locking:
 994  *      -- We use the map write lock for synchronization among races.
 995  *      -- The map write lock, and not the simple s_lock, protects the
 996  *         swap state of the map.
 997  *      -- If a map entry is a share map, then we hold both locks, in
 998  *         hierarchical order.
 999  *
1000  * Synchronization Notes:
1001  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1002  *      will block on the map lock and proceed when swapout is through.
1003  *      2) A vm_map_reference() call at this time is illegal, and will
1004  *      cause a panic.  vm_map_reference() is only allowed on resident
1005  *      maps, since it refuses to block.
1006  *      3) A vm_map_swapin() call during a swapin will block, and
1007  *      proceeed when the first swapin is done, turning into a nop.
1008  *      This is the reason the res_count is not incremented until
1009  *      after the swapin is complete.
1010  *      4) There is a timing hole after the checks of the res_count, before
1011  *      the map lock is taken, during which a swapin may get the lock
1012  *      before a swapout about to happen.  If this happens, the swapin
1013  *      will detect the state and increment the reference count, causing
1014  *      the swapout to be a nop, thereby delaying it until a later
1015  *      vm_map_deallocate.  If the swapout gets the lock first, then
1016  *      the swapin will simply block until the swapout is done, and
1017  *      then proceed.
1018  *
1019  * Because vm_map_swapin() is potentially an expensive operation, it
1020  * should be used with caution.
1021  *
1022  * Invariants:
1023  *      1) A map with a residence count of zero is either swapped, or
1024  *         being swapped.
1025  *      2) A map with a non-zero residence count is either resident,
1026  *         or being swapped in.
1027  */
1028
1029 int vm_map_swap_enable = 1;
1030
1031 void vm_map_swapin (vm_map_t map)
1032 {
1033         register vm_map_entry_t entry;
1034
1035         if (!vm_map_swap_enable)        /* debug */
1036                 return;
1037
1038         /*
1039          * Map is locked
1040          * First deal with various races.
1041          */
1042         if (map->sw_state == MAP_SW_IN)
1043                 /*
1044                  * we raced with swapout and won.  Returning will incr.
1045                  * the res_count, turning the swapout into a nop.
1046                  */
1047                 return;
1048
1049         /*
1050          * The residence count must be zero.  If we raced with another
1051          * swapin, the state would have been IN; if we raced with a
1052          * swapout (after another competing swapin), we must have lost
1053          * the race to get here (see above comment), in which case
1054          * res_count is still 0.
1055          */
1056         assert(map->res_count == 0);
1057
1058         /*
1059          * There are no intermediate states of a map going out or
1060          * coming in, since the map is locked during the transition.
1061          */
1062         assert(map->sw_state == MAP_SW_OUT);
1063
1064         /*
1065          * We now operate upon each map entry.  If the entry is a sub-
1066          * or share-map, we call vm_map_res_reference upon it.
1067          * If the entry is an object, we call vm_object_res_reference
1068          * (this may iterate through the shadow chain).
1069          * Note that we hold the map locked the entire time,
1070          * even if we get back here via a recursive call in
1071          * vm_map_res_reference.
1072          */
1073         entry = vm_map_first_entry(map);
1074
1075         while (entry != vm_map_to_entry(map)) {
1076                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1077                         if (entry->is_sub_map) {
1078                                 vm_map_t lmap = entry->object.sub_map;
1079                                 lck_mtx_lock(&lmap->s_lock);
1080                                 vm_map_res_reference(lmap);
1081                                 lck_mtx_unlock(&lmap->s_lock);
1082                         } else {
1083                                 vm_object_t object = entry->object.vm_object;
1084                                 vm_object_lock(object);
1085                                 /*
1086                                  * This call may iterate through the
1087                                  * shadow chain.
1088                                  */
1089                                 vm_object_res_reference(object);
1090                                 vm_object_unlock(object);
1091                         }
1092                 }
1093                 entry = entry->vme_next;
1094         }
1095         assert(map->sw_state == MAP_SW_OUT);
1096         map->sw_state = MAP_SW_IN;
1097 }
1098
1099 void vm_map_swapout(vm_map_t map)
1100 {
1101         register vm_map_entry_t entry;
1102
1103         /*
1104          * Map is locked
1105          * First deal with various races.
1106          * If we raced with a swapin and lost, the residence count
1107          * will have been incremented to 1, and we simply return.
1108          */
1109         lck_mtx_lock(&map->s_lock);
1110         if (map->res_count != 0) {
1111                 lck_mtx_unlock(&map->s_lock);
1112                 return;
1113         }
1114         lck_mtx_unlock(&map->s_lock);
1115
1116         /*
1117          * There are no intermediate states of a map going out or
1118          * coming in, since the map is locked during the transition.
1119          */
1120         assert(map->sw_state == MAP_SW_IN);
1121
1122         if (!vm_map_swap_enable)
1123                 return;
1124
1125         /*
1126          * We now operate upon each map entry.  If the entry is a sub-
1127          * or share-map, we call vm_map_res_deallocate upon it.
1128          * If the entry is an object, we call vm_object_res_deallocate
1129          * (this may iterate through the shadow chain).
1130          * Note that we hold the map locked the entire time,
1131          * even if we get back here via a recursive call in
1132          * vm_map_res_deallocate.
1133          */
1134         entry = vm_map_first_entry(map);
1135
1136         while (entry != vm_map_to_entry(map)) {
1137                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1138                         if (entry->is_sub_map) {
1139                                 vm_map_t lmap = entry->object.sub_map;
1140                                 lck_mtx_lock(&lmap->s_lock);
1141                                 vm_map_res_deallocate(lmap);
1142                                 lck_mtx_unlock(&lmap->s_lock);
1143                         } else {
1144                                 vm_object_t object = entry->object.vm_object;
1145                                 vm_object_lock(object);
1146                                 /*
1147                                  * This call may take a long time,
1148                                  * since it could actively push
1149                                  * out pages (if we implement it
1150                                  * that way).
1151                                  */
1152                                 vm_object_res_deallocate(object);
1153                                 vm_object_unlock(object);
1154                         }
1155                 }
1156                 entry = entry->vme_next;
1157         }
1158         assert(map->sw_state == MAP_SW_IN);
1159         map->sw_state = MAP_SW_OUT;
1160 }
1161
1162 #endif  /* TASK_SWAPPER */
1163
1164 /*
1165  *      vm_map_lookup_entry:    [ internal use only ]
1166  *
1167  *      Calls into the vm map store layer to find the map
1168  *      entry containing (or immediately preceding) the
1169  *      specified address in the given map; the entry is returned
1170  *      in the "entry" parameter.  The boolean
1171  *      result indicates whether the address is
1172  *      actually contained in the map.
1173  */
1174 boolean_t
1175 vm_map_lookup_entry(
1176         register vm_map_t               map,
1177         register vm_map_offset_t        address,
1178         vm_map_entry_t          *entry)         /* OUT */
1179 {
1180         return ( vm_map_store_lookup_entry( map, address, entry ));
1181 }
1182
1183 /*
1184  *      Routine:        vm_map_find_space
1185  *      Purpose:
1186  *              Allocate a range in the specified virtual address map,
1187  *              returning the entry allocated for that range.
1188  *              Used by kmem_alloc, etc.
1189  *
1190  *              The map must be NOT be locked. It will be returned locked
1191  *              on KERN_SUCCESS, unlocked on failure.
1192  *
1193  *              If an entry is allocated, the object/offset fields
1194  *              are initialized to zero.
1195  */
1196 kern_return_t
1197 vm_map_find_space(
1198         register vm_map_t       map,
1199         vm_map_offset_t         *address,       /* OUT */
1200         vm_map_size_t           size,
1201         vm_map_offset_t         mask,
1202         int                     flags,
1203         vm_map_entry_t          *o_entry)       /* OUT */
1204 {
1205         register vm_map_entry_t entry, new_entry;
1206         register vm_map_offset_t        start;
1207         register vm_map_offset_t        end;
1208
1209         if (size == 0) {
1210                 *address = 0;
1211                 return KERN_INVALID_ARGUMENT;
1212         }
1213
1214         if (flags & VM_FLAGS_GUARD_AFTER) {
1215                 /* account for the back guard page in the size */
1216                 size += PAGE_SIZE_64;
1217         }
1218
1219         new_entry = vm_map_entry_create(map, FALSE);
1220
1221         /*
1222          *      Look for the first possible address; if there's already
1223          *      something at this address, we have to start after it.
1224          */
1225
1226         vm_map_lock(map);
1227
1228         if( map->disable_vmentry_reuse == TRUE) {
1229                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1230         } else {
1231                 assert(first_free_is_valid(map));
1232                 if ((entry = map->first_free) == vm_map_to_entry(map))
1233                         start = map->min_offset;
1234                 else
1235                         start = entry->vme_end;
1236         }
1237
1238         /*
1239          *      In any case, the "entry" always precedes
1240          *      the proposed new region throughout the loop:
1241          */
1242
1243         while (TRUE) {
1244                 register vm_map_entry_t next;
1245
1246                 /*
1247                  *      Find the end of the proposed new region.
1248                  *      Be sure we didn't go beyond the end, or
1249                  *      wrap around the address.
1250                  */
1251
1252                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1253                         /* reserve space for the front guard page */
1254                         start += PAGE_SIZE_64;
1255                 }
1256                 end = ((start + mask) & ~mask);
1257
1258                 if (end < start) {
1259                         vm_map_entry_dispose(map, new_entry);
1260                         vm_map_unlock(map);
1261                         return(KERN_NO_SPACE);
1262                 }
1263                 start = end;
1264                 end += size;
1265
1266                 if ((end > map->max_offset) || (end < start)) {
1267                         vm_map_entry_dispose(map, new_entry);
1268                         vm_map_unlock(map);
1269                         return(KERN_NO_SPACE);
1270                 }
1271
1272                 /*
1273                  *      If there are no more entries, we must win.
1274                  */
1275
1276                 next = entry->vme_next;
1277                 if (next == vm_map_to_entry(map))
1278                         break;
1279
1280                 /*
1281                  *      If there is another entry, it must be
1282                  *      after the end of the potential new region.
1283                  */
1284
1285                 if (next->vme_start >= end)
1286                         break;
1287
1288                 /*
1289                  *      Didn't fit -- move to the next entry.
1290                  */
1291
1292                 entry = next;
1293                 start = entry->vme_end;
1294         }
1295
1296         /*
1297          *      At this point,
1298          *              "start" and "end" should define the endpoints of the
1299          *                      available new range, and
1300          *              "entry" should refer to the region before the new
1301          *                      range, and
1302          *
1303          *              the map should be locked.
1304          */
1305
1306         if (flags & VM_FLAGS_GUARD_BEFORE) {
1307                 /* go back for the front guard page */
1308                 start -= PAGE_SIZE_64;
1309         }
1310         *address = start;
1311
1312         assert(start < end);
1313         new_entry->vme_start = start;
1314         new_entry->vme_end = end;
1315         assert(page_aligned(new_entry->vme_start));
1316         assert(page_aligned(new_entry->vme_end));
1317
1318         new_entry->is_shared = FALSE;
1319         new_entry->is_sub_map = FALSE;
1320         new_entry->use_pmap = FALSE;
1321         new_entry->object.vm_object = VM_OBJECT_NULL;
1322         new_entry->offset = (vm_object_offset_t) 0;
1323
1324         new_entry->needs_copy = FALSE;
1325
1326         new_entry->inheritance = VM_INHERIT_DEFAULT;
1327         new_entry->protection = VM_PROT_DEFAULT;
1328         new_entry->max_protection = VM_PROT_ALL;
1329         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1330         new_entry->wired_count = 0;
1331         new_entry->user_wired_count = 0;
1332
1333         new_entry->in_transition = FALSE;
1334         new_entry->needs_wakeup = FALSE;
1335         new_entry->no_cache = FALSE;
1336         new_entry->permanent = FALSE;
1337         new_entry->superpage_size = 0;
1338
1339         new_entry->used_for_jit = 0;
1340
1341         new_entry->alias = 0;
1342         new_entry->zero_wired_pages = FALSE;
1343
1344         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1345
1346         /*
1347          *      Insert the new entry into the list
1348          */
1349
1350         vm_map_store_entry_link(map, entry, new_entry);
1351
1352         map->size += size;
1353
1354         /*
1355          *      Update the lookup hint
1356          */
1357         SAVE_HINT_MAP_WRITE(map, new_entry);
1358
1359         *o_entry = new_entry;
1360         return(KERN_SUCCESS);
1361 }
1362
1363 int vm_map_pmap_enter_print = FALSE;
1364 int vm_map_pmap_enter_enable = FALSE;
1365
1366 /*
1367  *      Routine:        vm_map_pmap_enter [internal only]
1368  *
1369  *      Description:
1370  *              Force pages from the specified object to be entered into
1371  *              the pmap at the specified address if they are present.
1372  *              As soon as a page not found in the object the scan ends.
1373  *
1374  *      Returns:
1375  *              Nothing.
1376  *
1377  *      In/out conditions:
1378  *              The source map should not be locked on entry.
1379  */
1380 static void
1381 vm_map_pmap_enter(
1382         vm_map_t                map,
1383         register vm_map_offset_t        addr,
1384         register vm_map_offset_t        end_addr,
1385         register vm_object_t    object,
1386         vm_object_offset_t      offset,
1387         vm_prot_t               protection)
1388 {
1389         int                     type_of_fault;
1390         kern_return_t           kr;
1391
1392         if(map->pmap == 0)
1393                 return;
1394
1395         while (addr < end_addr) {
1396                 register vm_page_t      m;
1397
1398                 vm_object_lock(object);
1399
1400                 m = vm_page_lookup(object, offset);
1401                 /*
1402                  * ENCRYPTED SWAP:
1403                  * The user should never see encrypted data, so do not
1404                  * enter an encrypted page in the page table.
1405                  */
1406                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1407                     m->fictitious ||
1408                     (m->unusual && ( m->error || m->restart || m->absent))) {
1409                         vm_object_unlock(object);
1410                         return;
1411                 }
1412
1413                 if (vm_map_pmap_enter_print) {
1414                         printf("vm_map_pmap_enter:");
1415                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1416                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1417                 }
1418                 type_of_fault = DBG_CACHE_HIT_FAULT;
1419                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1420                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
1421                                     &type_of_fault);
1422
1423                 vm_object_unlock(object);
1424
1425                 offset += PAGE_SIZE_64;
1426                 addr += PAGE_SIZE;
1427         }
1428 }
1429
1430 boolean_t vm_map_pmap_is_empty(
1431         vm_map_t        map,
1432         vm_map_offset_t start,
1433         vm_map_offset_t end);
1434 boolean_t vm_map_pmap_is_empty(
1435         vm_map_t        map,
1436         vm_map_offset_t start,
1437         vm_map_offset_t end)
1438 {
1439 #ifdef MACHINE_PMAP_IS_EMPTY
1440         return pmap_is_empty(map->pmap, start, end);
1441 #else   /* MACHINE_PMAP_IS_EMPTY */
1442         vm_map_offset_t offset;
1443         ppnum_t         phys_page;
1444
1445         if (map->pmap == NULL) {
1446                 return TRUE;
1447         }
1448
1449         for (offset = start;
1450              offset < end;
1451              offset += PAGE_SIZE) {
1452                 phys_page = pmap_find_phys(map->pmap, offset);
1453                 if (phys_page) {
1454                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1455                                 "page %d at 0x%llx\n",
1456                                 map, (long long)start, (long long)end,
1457                                 phys_page, (long long)offset);
1458                         return FALSE;
1459                 }
1460         }
1461         return TRUE;
1462 #endif  /* MACHINE_PMAP_IS_EMPTY */
1463 }
1464
1465 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1466 kern_return_t
1467 vm_map_random_address_for_size(
1468         vm_map_t        map,
1469         vm_map_offset_t *address,
1470         vm_map_size_t   size)
1471 {
1472         kern_return_t   kr = KERN_SUCCESS;
1473         int             tries = 0;
1474         vm_map_offset_t random_addr = 0;
1475         vm_map_offset_t hole_end;
1476
1477         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1478         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1479         vm_map_size_t   vm_hole_size = 0;
1480         vm_map_size_t   addr_space_size;
1481
1482         addr_space_size = vm_map_max(map) - vm_map_min(map);
1483
1484         assert(page_aligned(size));
1485
1486         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1487                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1488                 random_addr = trunc_page(vm_map_min(map) +
1489                                          (random_addr % addr_space_size));
1490
1491                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1492                         if (prev_entry == vm_map_to_entry(map)) {
1493                                 next_entry = vm_map_first_entry(map);
1494                         } else {
1495                                 next_entry = prev_entry->vme_next;
1496                         }
1497                         if (next_entry == vm_map_to_entry(map)) {
1498                                 hole_end = vm_map_max(map);
1499                         } else {
1500                                 hole_end = next_entry->vme_start;
1501                         }
1502                         vm_hole_size = hole_end - random_addr;
1503                         if (vm_hole_size >= size) {
1504                                 *address = random_addr;
1505                                 break;
1506                         }
1507                 }
1508                 tries++;
1509         }
1510
1511         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1512                 kr = KERN_NO_SPACE;
1513         }
1514         return kr;
1515 }
1516
1517 /*
1518  *      Routine:        vm_map_enter
1519  *
1520  *      Description:
1521  *              Allocate a range in the specified virtual address map.
1522  *              The resulting range will refer to memory defined by
1523  *              the given memory object and offset into that object.
1524  *
1525  *              Arguments are as defined in the vm_map call.
1526  */
1527 int _map_enter_debug = 0;
1528 static unsigned int vm_map_enter_restore_successes = 0;
1529 static unsigned int vm_map_enter_restore_failures = 0;
1530 kern_return_t
1531 vm_map_enter(
1532         vm_map_t                map,
1533         vm_map_offset_t         *address,       /* IN/OUT */
1534         vm_map_size_t           size,
1535         vm_map_offset_t         mask,
1536         int                     flags,
1537         vm_object_t             object,
1538         vm_object_offset_t      offset,
1539         boolean_t               needs_copy,
1540         vm_prot_t               cur_protection,
1541         vm_prot_t               max_protection,
1542         vm_inherit_t            inheritance)
1543 {
1544         vm_map_entry_t          entry, new_entry;
1545         vm_map_offset_t         start, tmp_start, tmp_offset;
1546         vm_map_offset_t         end, tmp_end;
1547         vm_map_offset_t         tmp2_start, tmp2_end;
1548         vm_map_offset_t         step;
1549         kern_return_t           result = KERN_SUCCESS;
1550         vm_map_t                zap_old_map = VM_MAP_NULL;
1551         vm_map_t                zap_new_map = VM_MAP_NULL;
1552         boolean_t               map_locked = FALSE;
1553         boolean_t               pmap_empty = TRUE;
1554         boolean_t               new_mapping_established = FALSE;
1555         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1556         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1557         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1558         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1559         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1560         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1561         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1562         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1563         char                    alias;
1564         vm_map_offset_t         effective_min_offset, effective_max_offset;
1565         kern_return_t           kr;
1566
1567         if (superpage_size) {
1568                 switch (superpage_size) {
1569                         /*
1570                          * Note that the current implementation only supports
1571                          * a single size for superpages, SUPERPAGE_SIZE, per
1572                          * architecture. As soon as more sizes are supposed
1573                          * to be supported, SUPERPAGE_SIZE has to be replaced
1574                          * with a lookup of the size depending on superpage_size.
1575                          */
1576 #ifdef __x86_64__
1577                         case SUPERPAGE_SIZE_ANY:
1578                                 /* handle it like 2 MB and round up to page size */
1579                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1580                         case SUPERPAGE_SIZE_2MB:
1581                                 break;
1582 #endif
1583                         default:
1584                                 return KERN_INVALID_ARGUMENT;
1585                 }
1586                 mask = SUPERPAGE_SIZE-1;
1587                 if (size & (SUPERPAGE_SIZE-1))
1588                         return KERN_INVALID_ARGUMENT;
1589                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1590         }
1591
1592
1593 #if CONFIG_EMBEDDED
1594         if (cur_protection & VM_PROT_WRITE){
1595                 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
1596                         printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
1597                         cur_protection &= ~VM_PROT_EXECUTE;
1598                 }
1599         }
1600 #endif /* CONFIG_EMBEDDED */
1601
1602         if (is_submap) {
1603                 if (purgable) {
1604                         /* submaps can not be purgeable */
1605                         return KERN_INVALID_ARGUMENT;
1606                 }
1607                 if (object == VM_OBJECT_NULL) {
1608                         /* submaps can not be created lazily */
1609                         return KERN_INVALID_ARGUMENT;
1610                 }
1611         }
1612         if (flags & VM_FLAGS_ALREADY) {
1613                 /*
1614                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1615                  * is already present.  For it to be meaningul, the requested
1616                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1617                  * we shouldn't try and remove what was mapped there first
1618                  * (!VM_FLAGS_OVERWRITE).
1619                  */
1620                 if ((flags & VM_FLAGS_ANYWHERE) ||
1621                     (flags & VM_FLAGS_OVERWRITE)) {
1622                         return KERN_INVALID_ARGUMENT;
1623                 }
1624         }
1625
1626         effective_min_offset = map->min_offset;
1627
1628         if (flags & VM_FLAGS_BEYOND_MAX) {
1629                 /*
1630                  * Allow an insertion beyond the map's max offset.
1631                  */
1632                 if (vm_map_is_64bit(map))
1633                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1634                 else
1635                         effective_max_offset = 0x00000000FFFFF000ULL;
1636         } else {
1637                 effective_max_offset = map->max_offset;
1638         }
1639
1640         if (size == 0 ||
1641             (offset & PAGE_MASK_64) != 0) {
1642                 *address = 0;
1643                 return KERN_INVALID_ARGUMENT;
1644         }
1645
1646         VM_GET_FLAGS_ALIAS(flags, alias);
1647
1648 #define RETURN(value)   { result = value; goto BailOut; }
1649
1650         assert(page_aligned(*address));
1651         assert(page_aligned(size));
1652
1653         /*
1654          * Only zero-fill objects are allowed to be purgable.
1655          * LP64todo - limit purgable objects to 32-bits for now
1656          */
1657         if (purgable &&
1658             (offset != 0 ||
1659              (object != VM_OBJECT_NULL &&
1660               (object->vo_size != size ||
1661                object->purgable == VM_PURGABLE_DENY))
1662              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1663                 return KERN_INVALID_ARGUMENT;
1664
1665         if (!anywhere && overwrite) {
1666                 /*
1667                  * Create a temporary VM map to hold the old mappings in the
1668                  * affected area while we create the new one.
1669                  * This avoids releasing the VM map lock in
1670                  * vm_map_entry_delete() and allows atomicity
1671                  * when we want to replace some mappings with a new one.
1672                  * It also allows us to restore the old VM mappings if the
1673                  * new mapping fails.
1674                  */
1675                 zap_old_map = vm_map_create(PMAP_NULL,
1676                                             *address,
1677                                             *address + size,
1678                                             map->hdr.entries_pageable);
1679         }
1680
1681 StartAgain: ;
1682
1683         start = *address;
1684
1685         if (anywhere) {
1686                 vm_map_lock(map);
1687                 map_locked = TRUE;
1688
1689                 if (entry_for_jit) {
1690                         if (map->jit_entry_exists) {
1691                                 result = KERN_INVALID_ARGUMENT;
1692                                 goto BailOut;
1693                         }
1694                         /*
1695                          * Get a random start address.
1696                          */
1697                         result = vm_map_random_address_for_size(map, address, size);
1698                         if (result != KERN_SUCCESS) {
1699                                 goto BailOut;
1700                         }
1701                         start = *address;
1702                 }
1703
1704
1705                 /*
1706                  *      Calculate the first possible address.
1707                  */
1708
1709                 if (start < effective_min_offset)
1710                         start = effective_min_offset;
1711                 if (start > effective_max_offset)
1712                         RETURN(KERN_NO_SPACE);
1713
1714                 /*
1715                  *      Look for the first possible address;
1716                  *      if there's already something at this
1717                  *      address, we have to start after it.
1718                  */
1719
1720                 if( map->disable_vmentry_reuse == TRUE) {
1721                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1722                 } else {
1723                         assert(first_free_is_valid(map));
1724
1725                         entry = map->first_free;
1726
1727                         if (entry == vm_map_to_entry(map)) {
1728                                 entry = NULL;
1729                         } else {
1730                                if (entry->vme_next == vm_map_to_entry(map)){
1731                                        /*
1732                                         * Hole at the end of the map.
1733                                         */
1734                                         entry = NULL;
1735                                } else {
1736                                         if (start < (entry->vme_next)->vme_start ) {
1737                                                 start = entry->vme_end;
1738                                         } else {
1739                                                 /*
1740                                                  * Need to do a lookup.
1741                                                  */
1742                                                 entry = NULL;
1743                                         }
1744                                }
1745                         }
1746
1747                         if (entry == NULL) {
1748                                 vm_map_entry_t  tmp_entry;
1749                                 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1750                                         assert(!entry_for_jit);
1751                                         start = tmp_entry->vme_end;
1752                                 }
1753                                 entry = tmp_entry;
1754                         }
1755                 }
1756
1757                 /*
1758                  *      In any case, the "entry" always precedes
1759                  *      the proposed new region throughout the
1760                  *      loop:
1761                  */
1762
1763                 while (TRUE) {
1764                         register vm_map_entry_t next;
1765
1766                         /*
1767                          *      Find the end of the proposed new region.
1768                          *      Be sure we didn't go beyond the end, or
1769                          *      wrap around the address.
1770                          */
1771
1772                         end = ((start + mask) & ~mask);
1773                         if (end < start)
1774                                 RETURN(KERN_NO_SPACE);
1775                         start = end;
1776                         end += size;
1777
1778                         if ((end > effective_max_offset) || (end < start)) {
1779                                 if (map->wait_for_space) {
1780                                         if (size <= (effective_max_offset -
1781                                                      effective_min_offset)) {
1782                                                 assert_wait((event_t)map,
1783                                                             THREAD_ABORTSAFE);
1784                                                 vm_map_unlock(map);
1785                                                 map_locked = FALSE;
1786                                                 thread_block(THREAD_CONTINUE_NULL);
1787                                                 goto StartAgain;
1788                                         }
1789                                 }
1790                                 RETURN(KERN_NO_SPACE);
1791                         }
1792
1793                         /*
1794                          *      If there are no more entries, we must win.
1795                          */
1796
1797                         next = entry->vme_next;
1798                         if (next == vm_map_to_entry(map))
1799                                 break;
1800
1801                         /*
1802                          *      If there is another entry, it must be
1803                          *      after the end of the potential new region.
1804                          */
1805
1806                         if (next->vme_start >= end)
1807                                 break;
1808
1809                         /*
1810                          *      Didn't fit -- move to the next entry.
1811                          */
1812
1813                         entry = next;
1814                         start = entry->vme_end;
1815                 }
1816                 *address = start;
1817         } else {
1818                 /*
1819                  *      Verify that:
1820                  *              the address doesn't itself violate
1821                  *              the mask requirement.
1822                  */
1823
1824                 vm_map_lock(map);
1825                 map_locked = TRUE;
1826                 if ((start & mask) != 0)
1827                         RETURN(KERN_NO_SPACE);
1828
1829                 /*
1830                  *      ...     the address is within bounds
1831                  */
1832
1833                 end = start + size;
1834
1835                 if ((start < effective_min_offset) ||
1836                     (end > effective_max_offset) ||
1837                     (start >= end)) {
1838                         RETURN(KERN_INVALID_ADDRESS);
1839                 }
1840
1841                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1842                         /*
1843                          * Fixed mapping and "overwrite" flag: attempt to
1844                          * remove all existing mappings in the specified
1845                          * address range, saving them in our "zap_old_map".
1846                          */
1847                         (void) vm_map_delete(map, start, end,
1848                                              VM_MAP_REMOVE_SAVE_ENTRIES,
1849                                              zap_old_map);
1850                 }
1851
1852                 /*
1853                  *      ...     the starting address isn't allocated
1854                  */
1855
1856                 if (vm_map_lookup_entry(map, start, &entry)) {
1857                         if (! (flags & VM_FLAGS_ALREADY)) {
1858                                 RETURN(KERN_NO_SPACE);
1859                         }
1860                         /*
1861                          * Check if what's already there is what we want.
1862                          */
1863                         tmp_start = start;
1864                         tmp_offset = offset;
1865                         if (entry->vme_start < start) {
1866                                 tmp_start -= start - entry->vme_start;
1867                                 tmp_offset -= start - entry->vme_start;
1868
1869                         }
1870                         for (; entry->vme_start < end;
1871                              entry = entry->vme_next) {
1872                                 /*
1873                                  * Check if the mapping's attributes
1874                                  * match the existing map entry.
1875                                  */
1876                                 if (entry == vm_map_to_entry(map) ||
1877                                     entry->vme_start != tmp_start ||
1878                                     entry->is_sub_map != is_submap ||
1879                                     entry->offset != tmp_offset ||
1880                                     entry->needs_copy != needs_copy ||
1881                                     entry->protection != cur_protection ||
1882                                     entry->max_protection != max_protection ||
1883                                     entry->inheritance != inheritance ||
1884                                     entry->alias != alias) {
1885                                         /* not the same mapping ! */
1886                                         RETURN(KERN_NO_SPACE);
1887                                 }
1888                                 /*
1889                                  * Check if the same object is being mapped.
1890                                  */
1891                                 if (is_submap) {
1892                                         if (entry->object.sub_map !=
1893                                             (vm_map_t) object) {
1894                                                 /* not the same submap */
1895                                                 RETURN(KERN_NO_SPACE);
1896                                         }
1897                                 } else {
1898                                         if (entry->object.vm_object != object) {
1899                                                 /* not the same VM object... */
1900                                                 vm_object_t obj2;
1901
1902                                                 obj2 = entry->object.vm_object;
1903                                                 if ((obj2 == VM_OBJECT_NULL ||
1904                                                      obj2->internal) &&
1905                                                     (object == VM_OBJECT_NULL ||
1906                                                      object->internal)) {
1907                                                         /*
1908                                                          * ... but both are
1909                                                          * anonymous memory,
1910                                                          * so equivalent.
1911                                                          */
1912                                                 } else {
1913                                                         RETURN(KERN_NO_SPACE);
1914                                                 }
1915                                         }
1916                                 }
1917
1918                                 tmp_offset += entry->vme_end - entry->vme_start;
1919                                 tmp_start += entry->vme_end - entry->vme_start;
1920                                 if (entry->vme_end >= end) {
1921                                         /* reached the end of our mapping */
1922                                         break;
1923                                 }
1924                         }
1925                         /* it all matches:  let's use what's already there ! */
1926                         RETURN(KERN_MEMORY_PRESENT);
1927                 }
1928
1929                 /*
1930                  *      ...     the next region doesn't overlap the
1931                  *              end point.
1932                  */
1933
1934                 if ((entry->vme_next != vm_map_to_entry(map)) &&
1935                     (entry->vme_next->vme_start < end))
1936                         RETURN(KERN_NO_SPACE);
1937         }
1938
1939         /*
1940          *      At this point,
1941          *              "start" and "end" should define the endpoints of the
1942          *                      available new range, and
1943          *              "entry" should refer to the region before the new
1944          *                      range, and
1945          *
1946          *              the map should be locked.
1947          */
1948
1949         /*
1950          *      See whether we can avoid creating a new entry (and object) by
1951          *      extending one of our neighbors.  [So far, we only attempt to
1952          *      extend from below.]  Note that we can never extend/join
1953          *      purgable objects because they need to remain distinct
1954          *      entities in order to implement their "volatile object"
1955          *      semantics.
1956          */
1957
1958         if (purgable || entry_for_jit) {
1959                 if (object == VM_OBJECT_NULL) {
1960                         object = vm_object_allocate(size);
1961                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
1962                         if (purgable) {
1963                                 object->purgable = VM_PURGABLE_NONVOLATILE;
1964                         }
1965                         offset = (vm_object_offset_t)0;
1966                 }
1967         } else if ((is_submap == FALSE) &&
1968                    (object == VM_OBJECT_NULL) &&
1969                    (entry != vm_map_to_entry(map)) &&
1970                    (entry->vme_end == start) &&
1971                    (!entry->is_shared) &&
1972                    (!entry->is_sub_map) &&
1973                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
1974                    (entry->inheritance == inheritance) &&
1975                    (entry->protection == cur_protection) &&
1976                    (entry->max_protection == max_protection) &&
1977                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
1978                    (entry->in_transition == 0) &&
1979                    (entry->no_cache == no_cache) &&
1980                    ((entry->vme_end - entry->vme_start) + size <=
1981                     (alias == VM_MEMORY_REALLOC ?
1982                      ANON_CHUNK_SIZE :
1983                      NO_COALESCE_LIMIT)) &&
1984                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
1985                 if (vm_object_coalesce(entry->object.vm_object,
1986                                        VM_OBJECT_NULL,
1987                                        entry->offset,
1988                                        (vm_object_offset_t) 0,
1989                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
1990                                        (vm_map_size_t)(end - entry->vme_end))) {
1991
1992                         /*
1993                          *      Coalesced the two objects - can extend
1994                          *      the previous map entry to include the
1995                          *      new range.
1996                          */
1997                         map->size += (end - entry->vme_end);
1998                         assert(entry->vme_start < end);
1999                         entry->vme_end = end;
2000                         vm_map_store_update_first_free(map, map->first_free);
2001                         RETURN(KERN_SUCCESS);
2002                 }
2003         }
2004
2005         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2006         new_entry = NULL;
2007
2008         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2009                 tmp2_end = tmp2_start + step;
2010                 /*
2011                  *      Create a new entry
2012                  *      LP64todo - for now, we can only allocate 4GB internal objects
2013                  *      because the default pager can't page bigger ones.  Remove this
2014                  *      when it can.
2015                  *
2016                  * XXX FBDP
2017                  * The reserved "page zero" in each process's address space can
2018                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2019                  * therefore different VM map entries serves no purpose and just
2020                  * slows down operations on the VM map, so let's not split the
2021                  * allocation into 4GB chunks if the max protection is NONE.  That
2022                  * memory should never be accessible, so it will never get to the
2023                  * default pager.
2024                  */
2025                 tmp_start = tmp2_start;
2026                 if (object == VM_OBJECT_NULL &&
2027                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2028                     max_protection != VM_PROT_NONE &&
2029                     superpage_size == 0)
2030                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2031                 else
2032                         tmp_end = tmp2_end;
2033                 do {
2034                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2035                                                         object, offset, needs_copy,
2036                                                         FALSE, FALSE,
2037                                                         cur_protection, max_protection,
2038                                                         VM_BEHAVIOR_DEFAULT,
2039                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2040                                                         0, no_cache,
2041                                                         permanent, superpage_size);
2042                         new_entry->alias = alias;
2043                         if (entry_for_jit){
2044                                 if (!(map->jit_entry_exists)){
2045                                         new_entry->used_for_jit = TRUE;
2046                                         map->jit_entry_exists = TRUE;
2047                                 }
2048                         }
2049
2050                         if (is_submap) {
2051                                 vm_map_t        submap;
2052                                 boolean_t       submap_is_64bit;
2053                                 boolean_t       use_pmap;
2054
2055                                 new_entry->is_sub_map = TRUE;
2056                                 submap = (vm_map_t) object;
2057                                 submap_is_64bit = vm_map_is_64bit(submap);
2058                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2059         #ifndef NO_NESTED_PMAP
2060                                 if (use_pmap && submap->pmap == NULL) {
2061                                         ledger_t ledger = map->pmap->ledger;
2062                                         /* we need a sub pmap to nest... */
2063                                         submap->pmap = pmap_create(ledger, 0,
2064                                             submap_is_64bit);
2065                                         if (submap->pmap == NULL) {
2066                                                 /* let's proceed without nesting... */
2067                                         }
2068                                 }
2069                                 if (use_pmap && submap->pmap != NULL) {
2070                                         kr = pmap_nest(map->pmap,
2071                                                        submap->pmap,
2072                                                        tmp_start,
2073                                                        tmp_start,
2074                                                        tmp_end - tmp_start);
2075                                         if (kr != KERN_SUCCESS) {
2076                                                 printf("vm_map_enter: "
2077                                                        "pmap_nest(0x%llx,0x%llx) "
2078                                                        "error 0x%x\n",
2079                                                        (long long)tmp_start,
2080                                                        (long long)tmp_end,
2081                                                        kr);
2082                                         } else {
2083                                                 /* we're now nested ! */
2084                                                 new_entry->use_pmap = TRUE;
2085                                                 pmap_empty = FALSE;
2086                                         }
2087                                 }
2088         #endif /* NO_NESTED_PMAP */
2089                         }
2090                         entry = new_entry;
2091
2092                         if (superpage_size) {
2093                                 vm_page_t pages, m;
2094                                 vm_object_t sp_object;
2095
2096                                 entry->offset = 0;
2097
2098                                 /* allocate one superpage */
2099                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2100                                 if (kr != KERN_SUCCESS) {
2101                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
2102                                         RETURN(kr);
2103                                 }
2104
2105                                 /* create one vm_object per superpage */
2106                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2107                                 sp_object->phys_contiguous = TRUE;
2108                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2109                                 entry->object.vm_object = sp_object;
2110
2111                                 /* enter the base pages into the object */
2112                                 vm_object_lock(sp_object);
2113                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2114                                         m = pages;
2115                                         pmap_zero_page(m->phys_page);
2116                                         pages = NEXT_PAGE(m);
2117                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2118                                         vm_page_insert(m, sp_object, offset);
2119                                 }
2120                                 vm_object_unlock(sp_object);
2121                         }
2122                 } while (tmp_end != tmp2_end &&
2123                          (tmp_start = tmp_end) &&
2124                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2125                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2126         }
2127
2128         vm_map_unlock(map);
2129         map_locked = FALSE;
2130
2131         new_mapping_established = TRUE;
2132
2133         /*      Wire down the new entry if the user
2134          *      requested all new map entries be wired.
2135          */
2136         if ((map->wiring_required)||(superpage_size)) {
2137                 pmap_empty = FALSE; /* pmap won't be empty */
2138                 kr = vm_map_wire(map, start, end,
2139                                      new_entry->protection, TRUE);
2140                 RETURN(kr);
2141         }
2142
2143         if ((object != VM_OBJECT_NULL) &&
2144             (vm_map_pmap_enter_enable) &&
2145             (!anywhere)  &&
2146             (!needs_copy) &&
2147             (size < (128*1024))) {
2148                 pmap_empty = FALSE; /* pmap won't be empty */
2149
2150                 if (override_nx(map, alias) && cur_protection)
2151                         cur_protection |= VM_PROT_EXECUTE;
2152
2153                 vm_map_pmap_enter(map, start, end,
2154                                   object, offset, cur_protection);
2155         }
2156
2157 BailOut: ;
2158         if (result == KERN_SUCCESS) {
2159                 vm_prot_t pager_prot;
2160                 memory_object_t pager;
2161
2162                 if (pmap_empty &&
2163                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2164                         assert(vm_map_pmap_is_empty(map,
2165                                                     *address,
2166                                                     *address+size));
2167                 }
2168
2169                 /*
2170                  * For "named" VM objects, let the pager know that the
2171                  * memory object is being mapped.  Some pagers need to keep
2172                  * track of this, to know when they can reclaim the memory
2173                  * object, for example.
2174                  * VM calls memory_object_map() for each mapping (specifying
2175                  * the protection of each mapping) and calls
2176                  * memory_object_last_unmap() when all the mappings are gone.
2177                  */
2178                 pager_prot = max_protection;
2179                 if (needs_copy) {
2180                         /*
2181                          * Copy-On-Write mapping: won't modify
2182                          * the memory object.
2183                          */
2184                         pager_prot &= ~VM_PROT_WRITE;
2185                 }
2186                 if (!is_submap &&
2187                     object != VM_OBJECT_NULL &&
2188                     object->named &&
2189                     object->pager != MEMORY_OBJECT_NULL) {
2190                         vm_object_lock(object);
2191                         pager = object->pager;
2192                         if (object->named &&
2193                             pager != MEMORY_OBJECT_NULL) {
2194                                 assert(object->pager_ready);
2195                                 vm_object_mapping_wait(object, THREAD_UNINT);
2196                                 vm_object_mapping_begin(object);
2197                                 vm_object_unlock(object);
2198
2199                                 kr = memory_object_map(pager, pager_prot);
2200                                 assert(kr == KERN_SUCCESS);
2201
2202                                 vm_object_lock(object);
2203                                 vm_object_mapping_end(object);
2204                         }
2205                         vm_object_unlock(object);
2206                 }
2207         } else {
2208                 if (new_mapping_established) {
2209                         /*
2210                          * We have to get rid of the new mappings since we
2211                          * won't make them available to the user.
2212                          * Try and do that atomically, to minimize the risk
2213                          * that someone else create new mappings that range.
2214                          */
2215                         zap_new_map = vm_map_create(PMAP_NULL,
2216                                                     *address,
2217                                                     *address + size,
2218                                                     map->hdr.entries_pageable);
2219                         if (!map_locked) {
2220                                 vm_map_lock(map);
2221                                 map_locked = TRUE;
2222                         }
2223                         (void) vm_map_delete(map, *address, *address+size,
2224                                              VM_MAP_REMOVE_SAVE_ENTRIES,
2225                                              zap_new_map);
2226                 }
2227                 if (zap_old_map != VM_MAP_NULL &&
2228                     zap_old_map->hdr.nentries != 0) {
2229                         vm_map_entry_t  entry1, entry2;
2230
2231                         /*
2232                          * The new mapping failed.  Attempt to restore
2233                          * the old mappings, saved in the "zap_old_map".
2234                          */
2235                         if (!map_locked) {
2236                                 vm_map_lock(map);
2237                                 map_locked = TRUE;
2238                         }
2239
2240                         /* first check if the coast is still clear */
2241                         start = vm_map_first_entry(zap_old_map)->vme_start;
2242                         end = vm_map_last_entry(zap_old_map)->vme_end;
2243                         if (vm_map_lookup_entry(map, start, &entry1) ||
2244                             vm_map_lookup_entry(map, end, &entry2) ||
2245                             entry1 != entry2) {
2246                                 /*
2247                                  * Part of that range has already been
2248                                  * re-mapped:  we can't restore the old
2249                                  * mappings...
2250                                  */
2251                                 vm_map_enter_restore_failures++;
2252                         } else {
2253                                 /*
2254                                  * Transfer the saved map entries from
2255                                  * "zap_old_map" to the original "map",
2256                                  * inserting them all after "entry1".
2257                                  */
2258                                 for (entry2 = vm_map_first_entry(zap_old_map);
2259                                      entry2 != vm_map_to_entry(zap_old_map);
2260                                      entry2 = vm_map_first_entry(zap_old_map)) {
2261                                         vm_map_size_t entry_size;
2262
2263                                         entry_size = (entry2->vme_end -
2264                                                       entry2->vme_start);
2265                                         vm_map_store_entry_unlink(zap_old_map,
2266                                                             entry2);
2267                                         zap_old_map->size -= entry_size;
2268                                         vm_map_store_entry_link(map, entry1, entry2);
2269                                         map->size += entry_size;
2270                                         entry1 = entry2;
2271                                 }
2272                                 if (map->wiring_required) {
2273                                         /*
2274                                          * XXX TODO: we should rewire the
2275                                          * old pages here...
2276                                          */
2277                                 }
2278                                 vm_map_enter_restore_successes++;
2279                         }
2280                 }
2281         }
2282
2283         if (map_locked) {
2284                 vm_map_unlock(map);
2285         }
2286
2287         /*
2288          * Get rid of the "zap_maps" and all the map entries that
2289          * they may still contain.
2290          */
2291         if (zap_old_map != VM_MAP_NULL) {
2292                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2293                 zap_old_map = VM_MAP_NULL;
2294         }
2295         if (zap_new_map != VM_MAP_NULL) {
2296                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2297                 zap_new_map = VM_MAP_NULL;
2298         }
2299
2300         return result;
2301
2302 #undef  RETURN
2303 }
2304
2305 kern_return_t
2306 vm_map_enter_mem_object(
2307         vm_map_t                target_map,
2308         vm_map_offset_t         *address,
2309         vm_map_size_t           initial_size,
2310         vm_map_offset_t         mask,
2311         int                     flags,
2312         ipc_port_t              port,
2313         vm_object_offset_t      offset,
2314         boolean_t               copy,
2315         vm_prot_t               cur_protection,
2316         vm_prot_t               max_protection,
2317         vm_inherit_t            inheritance)
2318 {
2319         vm_map_address_t        map_addr;
2320         vm_map_size_t           map_size;
2321         vm_object_t             object;
2322         vm_object_size_t        size;
2323         kern_return_t           result;
2324         boolean_t               mask_cur_protection, mask_max_protection;
2325
2326         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2327         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2328         cur_protection &= ~VM_PROT_IS_MASK;
2329         max_protection &= ~VM_PROT_IS_MASK;
2330
2331         /*
2332          * Check arguments for validity
2333          */
2334         if ((target_map == VM_MAP_NULL) ||
2335             (cur_protection & ~VM_PROT_ALL) ||
2336             (max_protection & ~VM_PROT_ALL) ||
2337             (inheritance > VM_INHERIT_LAST_VALID) ||
2338             initial_size == 0)
2339                 return KERN_INVALID_ARGUMENT;
2340
2341         map_addr = vm_map_trunc_page(*address);
2342         map_size = vm_map_round_page(initial_size);
2343         size = vm_object_round_page(initial_size);
2344
2345         /*
2346          * Find the vm object (if any) corresponding to this port.
2347          */
2348         if (!IP_VALID(port)) {
2349                 object = VM_OBJECT_NULL;
2350                 offset = 0;
2351                 copy = FALSE;
2352         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2353                 vm_named_entry_t        named_entry;
2354
2355                 named_entry = (vm_named_entry_t) port->ip_kobject;
2356                 /* a few checks to make sure user is obeying rules */
2357                 if (size == 0) {
2358                         if (offset >= named_entry->size)
2359                                 return KERN_INVALID_RIGHT;
2360                         size = named_entry->size - offset;
2361                 }
2362                 if (mask_max_protection) {
2363                         max_protection &= named_entry->protection;
2364                 }
2365                 if (mask_cur_protection) {
2366                         cur_protection &= named_entry->protection;
2367                 }
2368                 if ((named_entry->protection & max_protection) !=
2369                     max_protection)
2370                         return KERN_INVALID_RIGHT;
2371                 if ((named_entry->protection & cur_protection) !=
2372                     cur_protection)
2373                         return KERN_INVALID_RIGHT;
2374                 if (named_entry->size < (offset + size))
2375                         return KERN_INVALID_ARGUMENT;
2376
2377                 /* the callers parameter offset is defined to be the */
2378                 /* offset from beginning of named entry offset in object */
2379                 offset = offset + named_entry->offset;
2380
2381                 named_entry_lock(named_entry);
2382                 if (named_entry->is_sub_map) {
2383                         vm_map_t                submap;
2384
2385                         submap = named_entry->backing.map;
2386                         vm_map_lock(submap);
2387                         vm_map_reference(submap);
2388                         vm_map_unlock(submap);
2389                         named_entry_unlock(named_entry);
2390
2391                         result = vm_map_enter(target_map,
2392                                               &map_addr,
2393                                               map_size,
2394                                               mask,
2395                                               flags | VM_FLAGS_SUBMAP,
2396                                               (vm_object_t) submap,
2397                                               offset,
2398                                               copy,
2399                                               cur_protection,
2400                                               max_protection,
2401                                               inheritance);
2402                         if (result != KERN_SUCCESS) {
2403                                 vm_map_deallocate(submap);
2404                         } else {
2405                                 /*
2406                                  * No need to lock "submap" just to check its
2407                                  * "mapped" flag: that flag is never reset
2408                                  * once it's been set and if we race, we'll
2409                                  * just end up setting it twice, which is OK.
2410                                  */
2411                                 if (submap->mapped_in_other_pmaps == FALSE &&
2412                                     vm_map_pmap(submap) != PMAP_NULL &&
2413                                     vm_map_pmap(submap) !=
2414                                     vm_map_pmap(target_map)) {
2415                                         /*
2416                                          * This submap is being mapped in a map
2417                                          * that uses a different pmap.
2418                                          * Set its "mapped_in_other_pmaps" flag
2419                                          * to indicate that we now need to
2420                                          * remove mappings from all pmaps rather
2421                                          * than just the submap's pmap.
2422                                          */
2423                                         vm_map_lock(submap);
2424                                         submap->mapped_in_other_pmaps = TRUE;
2425                                         vm_map_unlock(submap);
2426                                 }
2427                                 *address = map_addr;
2428                         }
2429                         return result;
2430
2431                 } else if (named_entry->is_pager) {
2432                         unsigned int    access;
2433                         vm_prot_t       protections;
2434                         unsigned int    wimg_mode;
2435
2436                         protections = named_entry->protection & VM_PROT_ALL;
2437                         access = GET_MAP_MEM(named_entry->protection);
2438
2439                         object = vm_object_enter(named_entry->backing.pager,
2440                                                  named_entry->size,
2441                                                  named_entry->internal,
2442                                                  FALSE,
2443                                                  FALSE);
2444                         if (object == VM_OBJECT_NULL) {
2445                                 named_entry_unlock(named_entry);
2446                                 return KERN_INVALID_OBJECT;
2447                         }
2448
2449                         /* JMM - drop reference on pager here */
2450
2451                         /* create an extra ref for the named entry */
2452                         vm_object_lock(object);
2453                         vm_object_reference_locked(object);
2454                         named_entry->backing.object = object;
2455                         named_entry->is_pager = FALSE;
2456                         named_entry_unlock(named_entry);
2457
2458                         wimg_mode = object->wimg_bits;
2459
2460                         if (access == MAP_MEM_IO) {
2461                                 wimg_mode = VM_WIMG_IO;
2462                         } else if (access == MAP_MEM_COPYBACK) {
2463                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2464                         } else if (access == MAP_MEM_INNERWBACK) {
2465                                 wimg_mode = VM_WIMG_INNERWBACK;
2466                         } else if (access == MAP_MEM_WTHRU) {
2467                                 wimg_mode = VM_WIMG_WTHRU;
2468                         } else if (access == MAP_MEM_WCOMB) {
2469                                 wimg_mode = VM_WIMG_WCOMB;
2470                         }
2471
2472                         /* wait for object (if any) to be ready */
2473                         if (!named_entry->internal) {
2474                                 while (!object->pager_ready) {
2475                                         vm_object_wait(
2476                                                 object,
2477                                                 VM_OBJECT_EVENT_PAGER_READY,
2478                                                 THREAD_UNINT);
2479                                         vm_object_lock(object);
2480                                 }
2481                         }
2482
2483                         if (object->wimg_bits != wimg_mode)
2484                                 vm_object_change_wimg_mode(object, wimg_mode);
2485
2486                         object->true_share = TRUE;
2487
2488                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2489                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2490                         vm_object_unlock(object);
2491                 } else {
2492                         /* This is the case where we are going to map */
2493                         /* an already mapped object.  If the object is */
2494                         /* not ready it is internal.  An external     */
2495                         /* object cannot be mapped until it is ready  */
2496                         /* we can therefore avoid the ready check     */
2497                         /* in this case.  */
2498                         object = named_entry->backing.object;
2499                         assert(object != VM_OBJECT_NULL);
2500                         named_entry_unlock(named_entry);
2501                         vm_object_reference(object);
2502                 }
2503         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2504                 /*
2505                  * JMM - This is temporary until we unify named entries
2506                  * and raw memory objects.
2507                  *
2508                  * Detected fake ip_kotype for a memory object.  In
2509                  * this case, the port isn't really a port at all, but
2510                  * instead is just a raw memory object.
2511                  */
2512
2513                 object = vm_object_enter((memory_object_t)port,
2514                                          size, FALSE, FALSE, FALSE);
2515                 if (object == VM_OBJECT_NULL)
2516                         return KERN_INVALID_OBJECT;
2517
2518                 /* wait for object (if any) to be ready */
2519                 if (object != VM_OBJECT_NULL) {
2520                         if (object == kernel_object) {
2521                                 printf("Warning: Attempt to map kernel object"
2522                                         " by a non-private kernel entity\n");
2523                                 return KERN_INVALID_OBJECT;
2524                         }
2525                         if (!object->pager_ready) {
2526                                 vm_object_lock(object);
2527
2528                                 while (!object->pager_ready) {
2529                                         vm_object_wait(object,
2530                                                        VM_OBJECT_EVENT_PAGER_READY,
2531                                                        THREAD_UNINT);
2532                                         vm_object_lock(object);
2533                                 }
2534                                 vm_object_unlock(object);
2535                         }
2536                 }
2537         } else {
2538                 return KERN_INVALID_OBJECT;
2539         }
2540
2541         if (object != VM_OBJECT_NULL &&
2542             object->named &&
2543             object->pager != MEMORY_OBJECT_NULL &&
2544             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2545                 memory_object_t pager;
2546                 vm_prot_t       pager_prot;
2547                 kern_return_t   kr;
2548
2549                 /*
2550                  * For "named" VM objects, let the pager know that the
2551                  * memory object is being mapped.  Some pagers need to keep
2552                  * track of this, to know when they can reclaim the memory
2553                  * object, for example.
2554                  * VM calls memory_object_map() for each mapping (specifying
2555                  * the protection of each mapping) and calls
2556                  * memory_object_last_unmap() when all the mappings are gone.
2557                  */
2558                 pager_prot = max_protection;
2559                 if (copy) {
2560                         /*
2561                          * Copy-On-Write mapping: won't modify the
2562                          * memory object.
2563                          */
2564                         pager_prot &= ~VM_PROT_WRITE;
2565                 }
2566                 vm_object_lock(object);
2567                 pager = object->pager;
2568                 if (object->named &&
2569                     pager != MEMORY_OBJECT_NULL &&
2570                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2571                         assert(object->pager_ready);
2572                         vm_object_mapping_wait(object, THREAD_UNINT);
2573                         vm_object_mapping_begin(object);
2574                         vm_object_unlock(object);
2575
2576                         kr = memory_object_map(pager, pager_prot);
2577                         assert(kr == KERN_SUCCESS);
2578
2579                         vm_object_lock(object);
2580                         vm_object_mapping_end(object);
2581                 }
2582                 vm_object_unlock(object);
2583         }
2584
2585         /*
2586          *      Perform the copy if requested
2587          */
2588
2589         if (copy) {
2590                 vm_object_t             new_object;
2591                 vm_object_offset_t      new_offset;
2592
2593                 result = vm_object_copy_strategically(object, offset, size,
2594                                                       &new_object, &new_offset,
2595                                                       &copy);
2596
2597
2598                 if (result == KERN_MEMORY_RESTART_COPY) {
2599                         boolean_t success;
2600                         boolean_t src_needs_copy;
2601
2602                         /*
2603                          * XXX
2604                          * We currently ignore src_needs_copy.
2605                          * This really is the issue of how to make
2606                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2607                          * non-kernel users to use. Solution forthcoming.
2608                          * In the meantime, since we don't allow non-kernel
2609                          * memory managers to specify symmetric copy,
2610                          * we won't run into problems here.
2611                          */
2612                         new_object = object;
2613                         new_offset = offset;
2614                         success = vm_object_copy_quickly(&new_object,
2615                                                          new_offset, size,
2616                                                          &src_needs_copy,
2617                                                          &copy);
2618                         assert(success);
2619                         result = KERN_SUCCESS;
2620                 }
2621                 /*
2622                  *      Throw away the reference to the
2623                  *      original object, as it won't be mapped.
2624                  */
2625
2626                 vm_object_deallocate(object);
2627
2628                 if (result != KERN_SUCCESS)
2629                         return result;
2630
2631                 object = new_object;
2632                 offset = new_offset;
2633         }
2634
2635         result = vm_map_enter(target_map,
2636                               &map_addr, map_size,
2637                               (vm_map_offset_t)mask,
2638                               flags,
2639                               object, offset,
2640                               copy,
2641                               cur_protection, max_protection, inheritance);
2642         if (result != KERN_SUCCESS)
2643                 vm_object_deallocate(object);
2644         *address = map_addr;
2645         return result;
2646 }
2647
2648
2649
2650
2651 kern_return_t
2652 vm_map_enter_mem_object_control(
2653         vm_map_t                target_map,
2654         vm_map_offset_t         *address,
2655         vm_map_size_t           initial_size,
2656         vm_map_offset_t         mask,
2657         int                     flags,
2658         memory_object_control_t control,
2659         vm_object_offset_t      offset,
2660         boolean_t               copy,
2661         vm_prot_t               cur_protection,
2662         vm_prot_t               max_protection,
2663         vm_inherit_t            inheritance)
2664 {
2665         vm_map_address_t        map_addr;
2666         vm_map_size_t           map_size;
2667         vm_object_t             object;
2668         vm_object_size_t        size;
2669         kern_return_t           result;
2670         memory_object_t         pager;
2671         vm_prot_t               pager_prot;
2672         kern_return_t           kr;
2673
2674         /*
2675          * Check arguments for validity
2676          */
2677         if ((target_map == VM_MAP_NULL) ||
2678             (cur_protection & ~VM_PROT_ALL) ||
2679             (max_protection & ~VM_PROT_ALL) ||
2680             (inheritance > VM_INHERIT_LAST_VALID) ||
2681             initial_size == 0)
2682                 return KERN_INVALID_ARGUMENT;
2683
2684         map_addr = vm_map_trunc_page(*address);
2685         map_size = vm_map_round_page(initial_size);
2686         size = vm_object_round_page(initial_size);
2687
2688         object = memory_object_control_to_vm_object(control);
2689
2690         if (object == VM_OBJECT_NULL)
2691                 return KERN_INVALID_OBJECT;
2692
2693         if (object == kernel_object) {
2694                 printf("Warning: Attempt to map kernel object"
2695                        " by a non-private kernel entity\n");
2696                 return KERN_INVALID_OBJECT;
2697         }
2698
2699         vm_object_lock(object);
2700         object->ref_count++;
2701         vm_object_res_reference(object);
2702
2703         /*
2704          * For "named" VM objects, let the pager know that the
2705          * memory object is being mapped.  Some pagers need to keep
2706          * track of this, to know when they can reclaim the memory
2707          * object, for example.
2708          * VM calls memory_object_map() for each mapping (specifying
2709          * the protection of each mapping) and calls
2710          * memory_object_last_unmap() when all the mappings are gone.
2711          */
2712         pager_prot = max_protection;
2713         if (copy) {
2714                 pager_prot &= ~VM_PROT_WRITE;
2715         }
2716         pager = object->pager;
2717         if (object->named &&
2718             pager != MEMORY_OBJECT_NULL &&
2719             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2720                 assert(object->pager_ready);
2721                 vm_object_mapping_wait(object, THREAD_UNINT);
2722                 vm_object_mapping_begin(object);
2723                 vm_object_unlock(object);
2724
2725                 kr = memory_object_map(pager, pager_prot);
2726                 assert(kr == KERN_SUCCESS);
2727
2728                 vm_object_lock(object);
2729                 vm_object_mapping_end(object);
2730         }
2731         vm_object_unlock(object);
2732
2733         /*
2734          *      Perform the copy if requested
2735          */
2736
2737         if (copy) {
2738                 vm_object_t             new_object;
2739                 vm_object_offset_t      new_offset;
2740
2741                 result = vm_object_copy_strategically(object, offset, size,
2742                                                       &new_object, &new_offset,
2743                                                       &copy);
2744
2745
2746                 if (result == KERN_MEMORY_RESTART_COPY) {
2747                         boolean_t success;
2748                         boolean_t src_needs_copy;
2749
2750                         /*
2751                          * XXX
2752                          * We currently ignore src_needs_copy.
2753                          * This really is the issue of how to make
2754                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
2755                          * non-kernel users to use. Solution forthcoming.
2756                          * In the meantime, since we don't allow non-kernel
2757                          * memory managers to specify symmetric copy,
2758                          * we won't run into problems here.
2759                          */
2760                         new_object = object;
2761                         new_offset = offset;
2762                         success = vm_object_copy_quickly(&new_object,
2763                                                          new_offset, size,
2764                                                          &src_needs_copy,
2765                                                          &copy);
2766                         assert(success);
2767                         result = KERN_SUCCESS;
2768                 }
2769                 /*
2770                  *      Throw away the reference to the
2771                  *      original object, as it won't be mapped.
2772                  */
2773
2774                 vm_object_deallocate(object);
2775
2776                 if (result != KERN_SUCCESS)
2777                         return result;
2778
2779                 object = new_object;
2780                 offset = new_offset;
2781         }
2782
2783         result = vm_map_enter(target_map,
2784                               &map_addr, map_size,
2785                               (vm_map_offset_t)mask,
2786                               flags,
2787                               object, offset,
2788                               copy,
2789                               cur_protection, max_protection, inheritance);
2790         if (result != KERN_SUCCESS)
2791                 vm_object_deallocate(object);
2792         *address = map_addr;
2793
2794         return result;
2795 }
2796
2797
2798 #if     VM_CPM
2799
2800 #ifdef MACH_ASSERT
2801 extern pmap_paddr_t     avail_start, avail_end;
2802 #endif
2803
2804 /*
2805  *      Allocate memory in the specified map, with the caveat that
2806  *      the memory is physically contiguous.  This call may fail
2807  *      if the system can't find sufficient contiguous memory.
2808  *      This call may cause or lead to heart-stopping amounts of
2809  *      paging activity.
2810  *
2811  *      Memory obtained from this call should be freed in the
2812  *      normal way, viz., via vm_deallocate.
2813  */
2814 kern_return_t
2815 vm_map_enter_cpm(
2816         vm_map_t                map,
2817         vm_map_offset_t *addr,
2818         vm_map_size_t           size,
2819         int                     flags)
2820 {
2821         vm_object_t             cpm_obj;
2822         pmap_t                  pmap;
2823         vm_page_t               m, pages;
2824         kern_return_t           kr;
2825         vm_map_offset_t         va, start, end, offset;
2826 #if     MACH_ASSERT
2827         vm_map_offset_t         prev_addr = 0;
2828 #endif  /* MACH_ASSERT */
2829
2830         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
2831
2832         if (size == 0) {
2833                 *addr = 0;
2834                 return KERN_SUCCESS;
2835         }
2836         if (anywhere)
2837                 *addr = vm_map_min(map);
2838         else
2839                 *addr = vm_map_trunc_page(*addr);
2840         size = vm_map_round_page(size);
2841
2842         /*
2843          * LP64todo - cpm_allocate should probably allow
2844          * allocations of >4GB, but not with the current
2845          * algorithm, so just cast down the size for now.
2846          */
2847         if (size > VM_MAX_ADDRESS)
2848                 return KERN_RESOURCE_SHORTAGE;
2849         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
2850                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
2851                 return kr;
2852
2853         cpm_obj = vm_object_allocate((vm_object_size_t)size);
2854         assert(cpm_obj != VM_OBJECT_NULL);
2855         assert(cpm_obj->internal);
2856         assert(cpm_obj->vo_size == (vm_object_size_t)size);
2857         assert(cpm_obj->can_persist == FALSE);
2858         assert(cpm_obj->pager_created == FALSE);
2859         assert(cpm_obj->pageout == FALSE);
2860         assert(cpm_obj->shadow == VM_OBJECT_NULL);
2861
2862         /*
2863          *      Insert pages into object.
2864          */
2865
2866         vm_object_lock(cpm_obj);
2867         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2868                 m = pages;
2869                 pages = NEXT_PAGE(m);
2870                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2871
2872                 assert(!m->gobbled);
2873                 assert(!m->wanted);
2874                 assert(!m->pageout);
2875                 assert(!m->tabled);
2876                 assert(VM_PAGE_WIRED(m));
2877                 /*
2878                  * ENCRYPTED SWAP:
2879                  * "m" is not supposed to be pageable, so it
2880                  * should not be encrypted.  It wouldn't be safe
2881                  * to enter it in a new VM object while encrypted.
2882                  */
2883                 ASSERT_PAGE_DECRYPTED(m);
2884                 assert(m->busy);
2885                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
2886
2887                 m->busy = FALSE;
2888                 vm_page_insert(m, cpm_obj, offset);
2889         }
2890         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
2891         vm_object_unlock(cpm_obj);
2892
2893         /*
2894          *      Hang onto a reference on the object in case a
2895          *      multi-threaded application for some reason decides
2896          *      to deallocate the portion of the address space into
2897          *      which we will insert this object.
2898          *
2899          *      Unfortunately, we must insert the object now before
2900          *      we can talk to the pmap module about which addresses
2901          *      must be wired down.  Hence, the race with a multi-
2902          *      threaded app.
2903          */
2904         vm_object_reference(cpm_obj);
2905
2906         /*
2907          *      Insert object into map.
2908          */
2909
2910         kr = vm_map_enter(
2911                 map,
2912                 addr,
2913                 size,
2914                 (vm_map_offset_t)0,
2915                 flags,
2916                 cpm_obj,
2917                 (vm_object_offset_t)0,
2918                 FALSE,
2919                 VM_PROT_ALL,
2920                 VM_PROT_ALL,
2921                 VM_INHERIT_DEFAULT);
2922
2923         if (kr != KERN_SUCCESS) {
2924                 /*
2925                  *      A CPM object doesn't have can_persist set,
2926                  *      so all we have to do is deallocate it to
2927                  *      free up these pages.
2928                  */
2929                 assert(cpm_obj->pager_created == FALSE);
2930                 assert(cpm_obj->can_persist == FALSE);
2931                 assert(cpm_obj->pageout == FALSE);
2932                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
2933                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
2934                 vm_object_deallocate(cpm_obj); /* kill creation ref */
2935         }
2936
2937         /*
2938          *      Inform the physical mapping system that the
2939          *      range of addresses may not fault, so that
2940          *      page tables and such can be locked down as well.
2941          */
2942         start = *addr;
2943         end = start + size;
2944         pmap = vm_map_pmap(map);
2945         pmap_pageable(pmap, start, end, FALSE);
2946
2947         /*
2948          *      Enter each page into the pmap, to avoid faults.
2949          *      Note that this loop could be coded more efficiently,
2950          *      if the need arose, rather than looking up each page
2951          *      again.
2952          */
2953         for (offset = 0, va = start; offset < size;
2954              va += PAGE_SIZE, offset += PAGE_SIZE) {
2955                 int type_of_fault;
2956
2957                 vm_object_lock(cpm_obj);
2958                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2959                 assert(m != VM_PAGE_NULL);
2960
2961                 vm_page_zero_fill(m);
2962
2963                 type_of_fault = DBG_ZERO_FILL_FAULT;
2964
2965                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
2966                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
2967                                &type_of_fault);
2968
2969                 vm_object_unlock(cpm_obj);
2970         }
2971
2972 #if     MACH_ASSERT
2973         /*
2974          *      Verify ordering in address space.
2975          */
2976         for (offset = 0; offset < size; offset += PAGE_SIZE) {
2977                 vm_object_lock(cpm_obj);
2978                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
2979                 vm_object_unlock(cpm_obj);
2980                 if (m == VM_PAGE_NULL)
2981                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
2982                               cpm_obj, (uint64_t)offset);
2983                 assert(m->tabled);
2984                 assert(!m->busy);
2985                 assert(!m->wanted);
2986                 assert(!m->fictitious);
2987                 assert(!m->private);
2988                 assert(!m->absent);
2989                 assert(!m->error);
2990                 assert(!m->cleaning);
2991                 assert(!m->laundry);
2992                 assert(!m->precious);
2993                 assert(!m->clustered);
2994                 if (offset != 0) {
2995                         if (m->phys_page != prev_addr + 1) {
2996                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
2997                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
2998                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
2999                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3000                                 panic("vm_allocate_cpm:  pages not contig!");
3001                         }
3002                 }
3003                 prev_addr = m->phys_page;
3004         }
3005 #endif  /* MACH_ASSERT */
3006
3007         vm_object_deallocate(cpm_obj); /* kill extra ref */
3008
3009         return kr;
3010 }
3011
3012
3013 #else   /* VM_CPM */
3014
3015 /*
3016  *      Interface is defined in all cases, but unless the kernel
3017  *      is built explicitly for this option, the interface does
3018  *      nothing.
3019  */
3020
3021 kern_return_t
3022 vm_map_enter_cpm(
3023         __unused vm_map_t       map,
3024         __unused vm_map_offset_t        *addr,
3025         __unused vm_map_size_t  size,
3026         __unused int            flags)
3027 {
3028         return KERN_FAILURE;
3029 }
3030 #endif /* VM_CPM */
3031
3032 /* Not used without nested pmaps */
3033 #ifndef NO_NESTED_PMAP
3034 /*
3035  * Clip and unnest a portion of a nested submap mapping.
3036  */
3037
3038
3039 static void
3040 vm_map_clip_unnest(
3041         vm_map_t        map,
3042         vm_map_entry_t  entry,
3043         vm_map_offset_t start_unnest,
3044         vm_map_offset_t end_unnest)
3045 {
3046         vm_map_offset_t old_start_unnest = start_unnest;
3047         vm_map_offset_t old_end_unnest = end_unnest;
3048
3049         assert(entry->is_sub_map);
3050         assert(entry->object.sub_map != NULL);
3051
3052         /*
3053          * Query the platform for the optimal unnest range.
3054          * DRK: There's some duplication of effort here, since
3055          * callers may have adjusted the range to some extent. This
3056          * routine was introduced to support 1GiB subtree nesting
3057          * for x86 platforms, which can also nest on 2MiB boundaries
3058          * depending on size/alignment.
3059          */
3060         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3061                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3062         }
3063
3064         if (entry->vme_start > start_unnest ||
3065             entry->vme_end < end_unnest) {
3066                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3067                       "bad nested entry: start=0x%llx end=0x%llx\n",
3068                       (long long)start_unnest, (long long)end_unnest,
3069                       (long long)entry->vme_start, (long long)entry->vme_end);
3070         }
3071
3072         if (start_unnest > entry->vme_start) {
3073                 _vm_map_clip_start(&map->hdr,
3074                                    entry,
3075                                    start_unnest);
3076                 vm_map_store_update_first_free(map, map->first_free);
3077         }
3078         if (entry->vme_end > end_unnest) {
3079                 _vm_map_clip_end(&map->hdr,
3080                                  entry,
3081                                  end_unnest);
3082                 vm_map_store_update_first_free(map, map->first_free);
3083         }
3084
3085         pmap_unnest(map->pmap,
3086                     entry->vme_start,
3087                     entry->vme_end - entry->vme_start);
3088         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3089                 /* clean up parent map/maps */
3090                 vm_map_submap_pmap_clean(
3091                         map, entry->vme_start,
3092                         entry->vme_end,
3093                         entry->object.sub_map,
3094                         entry->offset);
3095         }
3096         entry->use_pmap = FALSE;
3097         if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3098                 entry->alias = VM_MEMORY_UNSHARED_PMAP;
3099         }
3100 }
3101 #endif  /* NO_NESTED_PMAP */
3102
3103 /*
3104  *      vm_map_clip_start:      [ internal use only ]
3105  *
3106  *      Asserts that the given entry begins at or after
3107  *      the specified address; if necessary,
3108  *      it splits the entry into two.
3109  */
3110 void
3111 vm_map_clip_start(
3112         vm_map_t        map,
3113         vm_map_entry_t  entry,
3114         vm_map_offset_t startaddr)
3115 {
3116 #ifndef NO_NESTED_PMAP
3117         if (entry->use_pmap &&
3118             startaddr >= entry->vme_start) {
3119                 vm_map_offset_t start_unnest, end_unnest;
3120
3121                 /*
3122                  * Make sure "startaddr" is no longer in a nested range
3123                  * before we clip.  Unnest only the minimum range the platform
3124                  * can handle.
3125                  * vm_map_clip_unnest may perform additional adjustments to
3126                  * the unnest range.
3127                  */
3128                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3129                 end_unnest = start_unnest + pmap_nesting_size_min;
3130                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3131         }
3132 #endif /* NO_NESTED_PMAP */
3133         if (startaddr > entry->vme_start) {
3134                 if (entry->object.vm_object &&
3135                     !entry->is_sub_map &&
3136                     entry->object.vm_object->phys_contiguous) {
3137                         pmap_remove(map->pmap,
3138                                     (addr64_t)(entry->vme_start),
3139                                     (addr64_t)(entry->vme_end));
3140                 }
3141                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3142                 vm_map_store_update_first_free(map, map->first_free);
3143         }
3144 }
3145
3146
3147 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3148         MACRO_BEGIN \
3149         if ((startaddr) > (entry)->vme_start) \
3150                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3151         MACRO_END
3152
3153 /*
3154  *      This routine is called only when it is known that
3155  *      the entry must be split.
3156  */
3157 static void
3158 _vm_map_clip_start(
3159         register struct vm_map_header   *map_header,
3160         register vm_map_entry_t         entry,
3161         register vm_map_offset_t                start)
3162 {
3163         register vm_map_entry_t new_entry;
3164
3165         /*
3166          *      Split off the front portion --
3167          *      note that we must insert the new
3168          *      entry BEFORE this one, so that
3169          *      this entry has the specified starting
3170          *      address.
3171          */
3172
3173         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3174         vm_map_entry_copy_full(new_entry, entry);
3175
3176         new_entry->vme_end = start;
3177         assert(new_entry->vme_start < new_entry->vme_end);
3178         entry->offset += (start - entry->vme_start);
3179         assert(start < entry->vme_end);
3180         entry->vme_start = start;
3181
3182         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3183
3184         if (entry->is_sub_map)
3185                 vm_map_reference(new_entry->object.sub_map);
3186         else
3187                 vm_object_reference(new_entry->object.vm_object);
3188 }
3189
3190
3191 /*
3192  *      vm_map_clip_end:        [ internal use only ]
3193  *
3194  *      Asserts that the given entry ends at or before
3195  *      the specified address; if necessary,
3196  *      it splits the entry into two.
3197  */
3198 void
3199 vm_map_clip_end(
3200         vm_map_t        map,
3201         vm_map_entry_t  entry,
3202         vm_map_offset_t endaddr)
3203 {
3204         if (endaddr > entry->vme_end) {
3205                 /*
3206                  * Within the scope of this clipping, limit "endaddr" to
3207                  * the end of this map entry...
3208                  */
3209                 endaddr = entry->vme_end;
3210         }
3211 #ifndef NO_NESTED_PMAP
3212         if (entry->use_pmap) {
3213                 vm_map_offset_t start_unnest, end_unnest;
3214
3215                 /*
3216                  * Make sure the range between the start of this entry and
3217                  * the new "endaddr" is no longer nested before we clip.
3218                  * Unnest only the minimum range the platform can handle.
3219                  * vm_map_clip_unnest may perform additional adjustments to
3220                  * the unnest range.
3221                  */
3222                 start_unnest = entry->vme_start;
3223                 end_unnest =
3224                         (endaddr + pmap_nesting_size_min - 1) &
3225                         ~(pmap_nesting_size_min - 1);
3226                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3227         }
3228 #endif /* NO_NESTED_PMAP */
3229         if (endaddr < entry->vme_end) {
3230                 if (entry->object.vm_object &&
3231                     !entry->is_sub_map &&
3232                     entry->object.vm_object->phys_contiguous) {
3233                         pmap_remove(map->pmap,
3234                                     (addr64_t)(entry->vme_start),
3235                                     (addr64_t)(entry->vme_end));
3236                 }
3237                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3238                 vm_map_store_update_first_free(map, map->first_free);
3239         }
3240 }
3241
3242
3243 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3244         MACRO_BEGIN \
3245         if ((endaddr) < (entry)->vme_end) \
3246                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3247         MACRO_END
3248
3249 /*
3250  *      This routine is called only when it is known that
3251  *      the entry must be split.
3252  */
3253 static void
3254 _vm_map_clip_end(
3255         register struct vm_map_header   *map_header,
3256         register vm_map_entry_t         entry,
3257         register vm_map_offset_t        end)
3258 {
3259         register vm_map_entry_t new_entry;
3260
3261         /*
3262          *      Create a new entry and insert it
3263          *      AFTER the specified entry
3264          */
3265
3266         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3267         vm_map_entry_copy_full(new_entry, entry);
3268
3269         assert(entry->vme_start < end);
3270         new_entry->vme_start = entry->vme_end = end;
3271         new_entry->offset += (end - entry->vme_start);
3272         assert(new_entry->vme_start < new_entry->vme_end);
3273
3274         _vm_map_store_entry_link(map_header, entry, new_entry);
3275
3276         if (entry->is_sub_map)
3277                 vm_map_reference(new_entry->object.sub_map);
3278         else
3279                 vm_object_reference(new_entry->object.vm_object);
3280 }
3281
3282
3283 /*
3284  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3285  *
3286  *      Asserts that the starting and ending region
3287  *      addresses fall within the valid range of the map.
3288  */
3289 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3290         MACRO_BEGIN                             \
3291         if (start < vm_map_min(map))            \
3292                 start = vm_map_min(map);        \
3293         if (end > vm_map_max(map))              \
3294                 end = vm_map_max(map);          \
3295         if (start > end)                        \
3296                 start = end;                    \
3297         MACRO_END
3298
3299 /*
3300  *      vm_map_range_check:     [ internal use only ]
3301  *
3302  *      Check that the region defined by the specified start and
3303  *      end addresses are wholly contained within a single map
3304  *      entry or set of adjacent map entries of the spacified map,
3305  *      i.e. the specified region contains no unmapped space.
3306  *      If any or all of the region is unmapped, FALSE is returned.
3307  *      Otherwise, TRUE is returned and if the output argument 'entry'
3308  *      is not NULL it points to the map entry containing the start
3309  *      of the region.
3310  *
3311  *      The map is locked for reading on entry and is left locked.
3312  */
3313 static boolean_t
3314 vm_map_range_check(
3315         register vm_map_t       map,
3316         register vm_map_offset_t        start,
3317         register vm_map_offset_t        end,
3318         vm_map_entry_t          *entry)
3319 {
3320         vm_map_entry_t          cur;
3321         register vm_map_offset_t        prev;
3322
3323         /*
3324          *      Basic sanity checks first
3325          */
3326         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3327                 return (FALSE);
3328
3329         /*
3330          *      Check first if the region starts within a valid
3331          *      mapping for the map.
3332          */
3333         if (!vm_map_lookup_entry(map, start, &cur))
3334                 return (FALSE);
3335
3336         /*
3337          *      Optimize for the case that the region is contained
3338          *      in a single map entry.
3339          */
3340         if (entry != (vm_map_entry_t *) NULL)
3341                 *entry = cur;
3342         if (end <= cur->vme_end)
3343                 return (TRUE);
3344
3345         /*
3346          *      If the region is not wholly contained within a
3347          *      single entry, walk the entries looking for holes.
3348          */
3349         prev = cur->vme_end;
3350         cur = cur->vme_next;
3351         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3352                 if (end <= cur->vme_end)
3353                         return (TRUE);
3354                 prev = cur->vme_end;
3355                 cur = cur->vme_next;
3356         }
3357         return (FALSE);
3358 }
3359
3360 /*
3361  *      vm_map_submap:          [ kernel use only ]
3362  *
3363  *      Mark the given range as handled by a subordinate map.
3364  *
3365  *      This range must have been created with vm_map_find using
3366  *      the vm_submap_object, and no other operations may have been
3367  *      performed on this range prior to calling vm_map_submap.
3368  *
3369  *      Only a limited number of operations can be performed
3370  *      within this rage after calling vm_map_submap:
3371  *              vm_fault
3372  *      [Don't try vm_map_copyin!]
3373  *
3374  *      To remove a submapping, one must first remove the
3375  *      range from the superior map, and then destroy the
3376  *      submap (if desired).  [Better yet, don't try it.]
3377  */
3378 kern_return_t
3379 vm_map_submap(
3380         vm_map_t                map,
3381         vm_map_offset_t start,
3382         vm_map_offset_t end,
3383         vm_map_t                submap,
3384         vm_map_offset_t offset,
3385 #ifdef NO_NESTED_PMAP
3386         __unused
3387 #endif  /* NO_NESTED_PMAP */
3388         boolean_t               use_pmap)
3389 {
3390         vm_map_entry_t          entry;
3391         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3392         register vm_object_t    object;
3393
3394         vm_map_lock(map);
3395
3396         if (! vm_map_lookup_entry(map, start, &entry)) {
3397                 entry = entry->vme_next;
3398         }
3399
3400         if (entry == vm_map_to_entry(map) ||
3401             entry->is_sub_map) {
3402                 vm_map_unlock(map);
3403                 return KERN_INVALID_ARGUMENT;
3404         }
3405
3406         assert(!entry->use_pmap); /* we don't want to unnest anything here */
3407         vm_map_clip_start(map, entry, start);
3408         vm_map_clip_end(map, entry, end);
3409
3410         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3411             (!entry->is_sub_map) &&
3412             ((object = entry->object.vm_object) == vm_submap_object) &&
3413             (object->resident_page_count == 0) &&
3414             (object->copy == VM_OBJECT_NULL) &&
3415             (object->shadow == VM_OBJECT_NULL) &&
3416             (!object->pager_created)) {
3417                 entry->offset = (vm_object_offset_t)offset;
3418                 entry->object.vm_object = VM_OBJECT_NULL;
3419                 vm_object_deallocate(object);
3420                 entry->is_sub_map = TRUE;
3421                 entry->object.sub_map = submap;
3422                 vm_map_reference(submap);
3423                 if (submap->mapped_in_other_pmaps == FALSE &&
3424                     vm_map_pmap(submap) != PMAP_NULL &&
3425                     vm_map_pmap(submap) != vm_map_pmap(map)) {
3426                         /*
3427                          * This submap is being mapped in a map
3428                          * that uses a different pmap.
3429                          * Set its "mapped_in_other_pmaps" flag
3430                          * to indicate that we now need to
3431                          * remove mappings from all pmaps rather
3432                          * than just the submap's pmap.
3433                          */
3434                         submap->mapped_in_other_pmaps = TRUE;
3435                 }
3436
3437 #ifndef NO_NESTED_PMAP
3438                 if (use_pmap) {
3439                         /* nest if platform code will allow */
3440                         if(submap->pmap == NULL) {
3441                                 ledger_t ledger = map->pmap->ledger;
3442                                 submap->pmap = pmap_create(ledger,
3443                                                 (vm_map_size_t) 0, FALSE);
3444                                 if(submap->pmap == PMAP_NULL) {
3445                                         vm_map_unlock(map);
3446                                         return(KERN_NO_SPACE);
3447                                 }
3448                         }
3449                         result = pmap_nest(map->pmap,
3450                                            (entry->object.sub_map)->pmap,
3451                                            (addr64_t)start,
3452                                            (addr64_t)start,
3453                                            (uint64_t)(end - start));
3454                         if(result)
3455                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3456                         entry->use_pmap = TRUE;
3457                 }
3458 #else   /* NO_NESTED_PMAP */
3459                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3460 #endif  /* NO_NESTED_PMAP */
3461                 result = KERN_SUCCESS;
3462         }
3463         vm_map_unlock(map);
3464
3465         return(result);
3466 }
3467
3468 /*
3469  *      vm_map_protect:
3470  *
3471  *      Sets the protection of the specified address
3472  *      region in the target map.  If "set_max" is
3473  *      specified, the maximum protection is to be set;
3474  *      otherwise, only the current protection is affected.
3475  */
3476 kern_return_t
3477 vm_map_protect(
3478         register vm_map_t       map,
3479         register vm_map_offset_t        start,
3480         register vm_map_offset_t        end,
3481         register vm_prot_t      new_prot,
3482         register boolean_t      set_max)
3483 {
3484         register vm_map_entry_t         current;
3485         register vm_map_offset_t        prev;
3486         vm_map_entry_t                  entry;
3487         vm_prot_t                       new_max;
3488
3489         XPR(XPR_VM_MAP,
3490             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
3491             map, start, end, new_prot, set_max);
3492
3493         vm_map_lock(map);
3494
3495         /* LP64todo - remove this check when vm_map_commpage64()
3496          * no longer has to stuff in a map_entry for the commpage
3497          * above the map's max_offset.
3498          */
3499         if (start >= map->max_offset) {
3500                 vm_map_unlock(map);
3501                 return(KERN_INVALID_ADDRESS);
3502         }
3503
3504         while(1) {
3505                 /*
3506                  *      Lookup the entry.  If it doesn't start in a valid
3507                  *      entry, return an error.
3508                  */
3509                 if (! vm_map_lookup_entry(map, start, &entry)) {
3510                         vm_map_unlock(map);
3511                         return(KERN_INVALID_ADDRESS);
3512                 }
3513
3514                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
3515                         start = SUPERPAGE_ROUND_DOWN(start);
3516                         continue;
3517                 }
3518                 break;
3519         }
3520         if (entry->superpage_size)
3521                 end = SUPERPAGE_ROUND_UP(end);
3522
3523         /*
3524          *      Make a first pass to check for protection and address
3525          *      violations.
3526          */
3527
3528         current = entry;
3529         prev = current->vme_start;
3530         while ((current != vm_map_to_entry(map)) &&
3531                (current->vme_start < end)) {
3532
3533                 /*
3534                  * If there is a hole, return an error.
3535                  */
3536                 if (current->vme_start != prev) {
3537                         vm_map_unlock(map);
3538                         return(KERN_INVALID_ADDRESS);
3539                 }
3540
3541                 new_max = current->max_protection;
3542                 if(new_prot & VM_PROT_COPY) {
3543                         new_max |= VM_PROT_WRITE;
3544                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
3545                                 vm_map_unlock(map);
3546                                 return(KERN_PROTECTION_FAILURE);
3547                         }
3548                 } else {
3549                         if ((new_prot & new_max) != new_prot) {
3550                                 vm_map_unlock(map);
3551                                 return(KERN_PROTECTION_FAILURE);
3552                         }
3553                 }
3554
3555 #if CONFIG_EMBEDDED
3556                 if (new_prot & VM_PROT_WRITE) {
3557                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
3558                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
3559                                 new_prot &= ~VM_PROT_EXECUTE;
3560                         }
3561                 }
3562 #endif
3563
3564                 prev = current->vme_end;
3565                 current = current->vme_next;
3566         }
3567         if (end > prev) {
3568                 vm_map_unlock(map);
3569                 return(KERN_INVALID_ADDRESS);
3570         }
3571
3572         /*
3573          *      Go back and fix up protections.
3574          *      Clip to start here if the range starts within
3575          *      the entry.
3576          */
3577
3578         current = entry;
3579         if (current != vm_map_to_entry(map)) {
3580                 /* clip and unnest if necessary */
3581                 vm_map_clip_start(map, current, start);
3582         }
3583
3584         while ((current != vm_map_to_entry(map)) &&
3585                (current->vme_start < end)) {
3586
3587                 vm_prot_t       old_prot;
3588
3589                 vm_map_clip_end(map, current, end);
3590
3591                 assert(!current->use_pmap); /* clipping did unnest if needed */
3592
3593                 old_prot = current->protection;
3594
3595                 if(new_prot & VM_PROT_COPY) {
3596                         /* caller is asking specifically to copy the      */
3597                         /* mapped data, this implies that max protection  */
3598                         /* will include write.  Caller must be prepared   */
3599                         /* for loss of shared memory communication in the */
3600                         /* target area after taking this step */
3601
3602                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
3603                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
3604                                 current->offset = 0;
3605                         }
3606                         current->needs_copy = TRUE;
3607                         current->max_protection |= VM_PROT_WRITE;
3608                 }
3609
3610                 if (set_max)
3611                         current->protection =
3612                                 (current->max_protection =
3613                                  new_prot & ~VM_PROT_COPY) &
3614                                 old_prot;
3615                 else
3616                         current->protection = new_prot & ~VM_PROT_COPY;
3617
3618                 /*
3619                  *      Update physical map if necessary.
3620                  *      If the request is to turn off write protection,
3621                  *      we won't do it for real (in pmap). This is because
3622                  *      it would cause copy-on-write to fail.  We've already
3623                  *      set, the new protection in the map, so if a
3624                  *      write-protect fault occurred, it will be fixed up
3625                  *      properly, COW or not.
3626                  */
3627                 if (current->protection != old_prot) {
3628                         /* Look one level in we support nested pmaps */
3629                         /* from mapped submaps which are direct entries */
3630                         /* in our map */
3631
3632                         vm_prot_t prot;
3633
3634                         prot = current->protection & ~VM_PROT_WRITE;
3635
3636                         if (override_nx(map, current->alias) && prot)
3637                                 prot |= VM_PROT_EXECUTE;
3638
3639                         if (current->is_sub_map && current->use_pmap) {
3640                                 pmap_protect(current->object.sub_map->pmap,
3641                                              current->vme_start,
3642                                              current->vme_end,
3643                                              prot);
3644                         } else {
3645                                 pmap_protect(map->pmap,
3646                                              current->vme_start,
3647                                              current->vme_end,
3648                                              prot);
3649                         }
3650                 }
3651                 current = current->vme_next;
3652         }
3653
3654         current = entry;
3655         while ((current != vm_map_to_entry(map)) &&
3656                (current->vme_start <= end)) {
3657                 vm_map_simplify_entry(map, current);
3658                 current = current->vme_next;
3659         }
3660
3661         vm_map_unlock(map);
3662         return(KERN_SUCCESS);
3663 }
3664
3665 /*
3666  *      vm_map_inherit:
3667  *
3668  *      Sets the inheritance of the specified address
3669  *      range in the target map.  Inheritance
3670  *      affects how the map will be shared with
3671  *      child maps at the time of vm_map_fork.
3672  */
3673 kern_return_t
3674 vm_map_inherit(
3675         register vm_map_t       map,
3676         register vm_map_offset_t        start,
3677         register vm_map_offset_t        end,
3678         register vm_inherit_t   new_inheritance)
3679 {
3680         register vm_map_entry_t entry;
3681         vm_map_entry_t  temp_entry;
3682
3683         vm_map_lock(map);
3684
3685         VM_MAP_RANGE_CHECK(map, start, end);
3686
3687         if (vm_map_lookup_entry(map, start, &temp_entry)) {
3688                 entry = temp_entry;
3689         }
3690         else {
3691                 temp_entry = temp_entry->vme_next;
3692                 entry = temp_entry;
3693         }
3694
3695         /* first check entire range for submaps which can't support the */
3696         /* given inheritance. */
3697         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3698                 if(entry->is_sub_map) {
3699                         if(new_inheritance == VM_INHERIT_COPY) {
3700                                 vm_map_unlock(map);
3701                                 return(KERN_INVALID_ARGUMENT);
3702                         }
3703                 }
3704
3705                 entry = entry->vme_next;
3706         }
3707
3708         entry = temp_entry;
3709         if (entry != vm_map_to_entry(map)) {
3710                 /* clip and unnest if necessary */
3711                 vm_map_clip_start(map, entry, start);
3712         }
3713
3714         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
3715                 vm_map_clip_end(map, entry, end);
3716                 assert(!entry->use_pmap); /* clip did unnest if needed */
3717
3718                 entry->inheritance = new_inheritance;
3719
3720                 entry = entry->vme_next;
3721         }
3722
3723         vm_map_unlock(map);
3724         return(KERN_SUCCESS);
3725 }
3726
3727 /*
3728  * Update the accounting for the amount of wired memory in this map.  If the user has
3729  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
3730  */
3731
3732 static kern_return_t
3733 add_wire_counts(
3734         vm_map_t        map,
3735         vm_map_entry_t  entry,
3736         boolean_t       user_wire)
3737 {
3738         vm_map_size_t   size;
3739
3740         if (user_wire) {
3741                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
3742
3743                 /*
3744                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
3745                  * this map entry.
3746                  */
3747
3748                 if (entry->user_wired_count == 0) {
3749                         size = entry->vme_end - entry->vme_start;
3750
3751                         /*
3752                          * Since this is the first time the user is wiring this map entry, check to see if we're
3753                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
3754                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
3755                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
3756                          * limit, then we fail.
3757                          */
3758
3759                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
3760                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
3761                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
3762                                 return KERN_RESOURCE_SHORTAGE;
3763
3764                         /*
3765                          * The first time the user wires an entry, we also increment the wired_count and add this to
3766                          * the total that has been wired in the map.
3767                          */
3768
3769                         if (entry->wired_count >= MAX_WIRE_COUNT)
3770                                 return KERN_FAILURE;
3771
3772                         entry->wired_count++;
3773                         map->user_wire_size += size;
3774                 }
3775
3776                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
3777                         return KERN_FAILURE;
3778
3779                 entry->user_wired_count++;
3780
3781         } else {
3782
3783                 /*
3784                  * The kernel's wiring the memory.  Just bump the count and continue.
3785                  */
3786
3787                 if (entry->wired_count >= MAX_WIRE_COUNT)
3788                         panic("vm_map_wire: too many wirings");
3789
3790                 entry->wired_count++;
3791         }
3792
3793         return KERN_SUCCESS;
3794 }
3795
3796 /*
3797  * Update the memory wiring accounting now that the given map entry is being unwired.
3798  */
3799
3800 static void
3801 subtract_wire_counts(
3802         vm_map_t        map,
3803         vm_map_entry_t  entry,
3804         boolean_t       user_wire)
3805 {
3806
3807         if (user_wire) {
3808
3809                 /*
3810                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
3811                  */
3812
3813                 if (entry->user_wired_count == 1) {
3814
3815                         /*
3816                          * We're removing the last user wire reference.  Decrement the wired_count and the total
3817                          * user wired memory for this map.
3818                          */
3819
3820                         assert(entry->wired_count >= 1);
3821                         entry->wired_count--;
3822                         map->user_wire_size -= entry->vme_end - entry->vme_start;
3823                 }
3824
3825                 assert(entry->user_wired_count >= 1);
3826                 entry->user_wired_count--;
3827
3828         } else {
3829
3830                 /*
3831                  * The kernel is unwiring the memory.   Just update the count.
3832                  */
3833
3834                 assert(entry->wired_count >= 1);
3835                 entry->wired_count--;
3836         }
3837 }
3838
3839 /*
3840  *      vm_map_wire:
3841  *
3842  *      Sets the pageability of the specified address range in the
3843  *      target map as wired.  Regions specified as not pageable require
3844  *      locked-down physical memory and physical page maps.  The
3845  *      access_type variable indicates types of accesses that must not
3846  *      generate page faults.  This is checked against protection of
3847  *      memory being locked-down.
3848  *
3849  *      The map must not be locked, but a reference must remain to the
3850  *      map throughout the call.
3851  */
3852 static kern_return_t
3853 vm_map_wire_nested(
3854         register vm_map_t       map,
3855         register vm_map_offset_t        start,
3856         register vm_map_offset_t        end,
3857         register vm_prot_t      access_type,
3858         boolean_t               user_wire,
3859         pmap_t                  map_pmap,
3860         vm_map_offset_t         pmap_addr)
3861 {
3862         register vm_map_entry_t entry;
3863         struct vm_map_entry     *first_entry, tmp_entry;
3864         vm_map_t                real_map;
3865         register vm_map_offset_t        s,e;
3866         kern_return_t           rc;
3867         boolean_t               need_wakeup;
3868         boolean_t               main_map = FALSE;
3869         wait_interrupt_t        interruptible_state;
3870         thread_t                cur_thread;
3871         unsigned int            last_timestamp;
3872         vm_map_size_t           size;
3873
3874         vm_map_lock(map);
3875         if(map_pmap == NULL)
3876                 main_map = TRUE;
3877         last_timestamp = map->timestamp;
3878
3879         VM_MAP_RANGE_CHECK(map, start, end);
3880         assert(page_aligned(start));
3881         assert(page_aligned(end));
3882         if (start == end) {
3883                 /* We wired what the caller asked for, zero pages */
3884                 vm_map_unlock(map);
3885                 return KERN_SUCCESS;
3886         }
3887
3888         need_wakeup = FALSE;
3889         cur_thread = current_thread();
3890
3891         s = start;
3892         rc = KERN_SUCCESS;
3893
3894         if (vm_map_lookup_entry(map, s, &first_entry)) {
3895                 entry = first_entry;
3896                 /*
3897                  * vm_map_clip_start will be done later.
3898                  * We don't want to unnest any nested submaps here !
3899                  */
3900         } else {
3901                 /* Start address is not in map */
3902                 rc = KERN_INVALID_ADDRESS;
3903                 goto done;
3904         }
3905
3906         while ((entry != vm_map_to_entry(map)) && (s < end)) {
3907                 /*
3908                  * At this point, we have wired from "start" to "s".
3909                  * We still need to wire from "s" to "end".
3910                  *
3911                  * "entry" hasn't been clipped, so it could start before "s"
3912                  * and/or end after "end".
3913                  */
3914
3915                 /* "e" is how far we want to wire in this entry */
3916                 e = entry->vme_end;
3917                 if (e > end)
3918                         e = end;
3919
3920                 /*
3921                  * If another thread is wiring/unwiring this entry then
3922                  * block after informing other thread to wake us up.
3923                  */
3924                 if (entry->in_transition) {
3925                         wait_result_t wait_result;
3926
3927                         /*
3928                          * We have not clipped the entry.  Make sure that
3929                          * the start address is in range so that the lookup
3930                          * below will succeed.
3931                          * "s" is the current starting point: we've already
3932                          * wired from "start" to "s" and we still have
3933                          * to wire from "s" to "end".
3934                          */
3935
3936                         entry->needs_wakeup = TRUE;
3937
3938                         /*
3939                          * wake up anybody waiting on entries that we have
3940                          * already wired.
3941                          */
3942                         if (need_wakeup) {
3943                                 vm_map_entry_wakeup(map);
3944                                 need_wakeup = FALSE;
3945                         }
3946                         /*
3947                          * User wiring is interruptible
3948                          */
3949                         wait_result = vm_map_entry_wait(map,
3950                                                         (user_wire) ? THREAD_ABORTSAFE :
3951                                                         THREAD_UNINT);
3952                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
3953                                 /*
3954                                  * undo the wirings we have done so far
3955                                  * We do not clear the needs_wakeup flag,
3956                                  * because we cannot tell if we were the
3957                                  * only one waiting.
3958                                  */
3959                                 rc = KERN_FAILURE;
3960                                 goto done;
3961                         }
3962
3963                         /*
3964                          * Cannot avoid a lookup here. reset timestamp.
3965                          */
3966                         last_timestamp = map->timestamp;
3967
3968                         /*
3969                          * The entry could have been clipped, look it up again.
3970                          * Worse that can happen is, it may not exist anymore.
3971                          */
3972                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
3973                                 /*
3974                                  * User: undo everything upto the previous
3975                                  * entry.  let vm_map_unwire worry about
3976                                  * checking the validity of the range.
3977                                  */
3978                                 rc = KERN_FAILURE;
3979                                 goto done;
3980                         }
3981                         entry = first_entry;
3982                         continue;
3983                 }
3984
3985                 if (entry->is_sub_map) {
3986                         vm_map_offset_t sub_start;
3987                         vm_map_offset_t sub_end;
3988                         vm_map_offset_t local_start;
3989                         vm_map_offset_t local_end;
3990                         pmap_t          pmap;
3991
3992                         vm_map_clip_start(map, entry, s);
3993                         vm_map_clip_end(map, entry, end);
3994
3995                         sub_start = entry->offset;
3996                         sub_end = entry->vme_end;
3997                         sub_end += entry->offset - entry->vme_start;
3998
3999                         local_end = entry->vme_end;
4000                         if(map_pmap == NULL) {
4001                                 vm_object_t             object;
4002                                 vm_object_offset_t      offset;
4003                                 vm_prot_t               prot;
4004                                 boolean_t               wired;
4005                                 vm_map_entry_t          local_entry;
4006                                 vm_map_version_t         version;
4007                                 vm_map_t                lookup_map;
4008
4009                                 if(entry->use_pmap) {
4010                                         pmap = entry->object.sub_map->pmap;
4011                                         /* ppc implementation requires that */
4012                                         /* submaps pmap address ranges line */
4013                                         /* up with parent map */
4014 #ifdef notdef
4015                                         pmap_addr = sub_start;
4016 #endif
4017                                         pmap_addr = s;
4018                                 } else {
4019                                         pmap = map->pmap;
4020                                         pmap_addr = s;
4021                                 }
4022
4023                                 if (entry->wired_count) {
4024                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4025                                                 goto done;
4026
4027                                         /*
4028                                          * The map was not unlocked:
4029                                          * no need to goto re-lookup.
4030                                          * Just go directly to next entry.
4031                                          */
4032                                         entry = entry->vme_next;
4033                                         s = entry->vme_start;
4034                                         continue;
4035
4036                                 }
4037
4038                                 /* call vm_map_lookup_locked to */
4039                                 /* cause any needs copy to be   */
4040                                 /* evaluated */
4041                                 local_start = entry->vme_start;
4042                                 lookup_map = map;
4043                                 vm_map_lock_write_to_read(map);
4044                                 if(vm_map_lookup_locked(
4045                                            &lookup_map, local_start,
4046                                            access_type,
4047                                            OBJECT_LOCK_EXCLUSIVE,
4048                                            &version, &object,
4049                                            &offset, &prot, &wired,
4050                                            NULL,
4051                                            &real_map)) {
4052
4053                                         vm_map_unlock_read(lookup_map);
4054                                         vm_map_unwire(map, start,
4055                                                       s, user_wire);
4056                                         return(KERN_FAILURE);
4057                                 }
4058                                 vm_object_unlock(object);
4059                                 if(real_map != lookup_map)
4060                                         vm_map_unlock(real_map);
4061                                 vm_map_unlock_read(lookup_map);
4062                                 vm_map_lock(map);
4063
4064                                 /* we unlocked, so must re-lookup */
4065                                 if (!vm_map_lookup_entry(map,
4066                                                          local_start,
4067                                                          &local_entry)) {
4068                                         rc = KERN_FAILURE;
4069                                         goto done;
4070                                 }
4071
4072                                 /*
4073                                  * entry could have been "simplified",
4074                                  * so re-clip
4075                                  */
4076                                 entry = local_entry;
4077                                 assert(s == local_start);
4078                                 vm_map_clip_start(map, entry, s);
4079                                 vm_map_clip_end(map, entry, end);
4080                                 /* re-compute "e" */
4081                                 e = entry->vme_end;
4082                                 if (e > end)
4083                                         e = end;
4084
4085                                 /* did we have a change of type? */
4086                                 if (!entry->is_sub_map) {
4087                                         last_timestamp = map->timestamp;
4088                                         continue;
4089                                 }
4090                         } else {
4091                                 local_start = entry->vme_start;
4092                                 pmap = map_pmap;
4093                         }
4094
4095                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4096                                 goto done;
4097
4098                         entry->in_transition = TRUE;
4099
4100                         vm_map_unlock(map);
4101                         rc = vm_map_wire_nested(entry->object.sub_map,
4102                                                 sub_start, sub_end,
4103                                                 access_type,
4104                                                 user_wire, pmap, pmap_addr);
4105                         vm_map_lock(map);
4106
4107                         /*
4108                          * Find the entry again.  It could have been clipped
4109                          * after we unlocked the map.
4110                          */
4111                         if (!vm_map_lookup_entry(map, local_start,
4112                                                  &first_entry))
4113                                 panic("vm_map_wire: re-lookup failed");
4114                         entry = first_entry;
4115
4116                         assert(local_start == s);
4117                         /* re-compute "e" */
4118                         e = entry->vme_end;
4119                         if (e > end)
4120                                 e = end;
4121
4122                         last_timestamp = map->timestamp;
4123                         while ((entry != vm_map_to_entry(map)) &&
4124                                (entry->vme_start < e)) {
4125                                 assert(entry->in_transition);
4126                                 entry->in_transition = FALSE;
4127                                 if (entry->needs_wakeup) {
4128                                         entry->needs_wakeup = FALSE;
4129                                         need_wakeup = TRUE;
4130                                 }
4131                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4132                                         subtract_wire_counts(map, entry, user_wire);
4133                                 }
4134                                 entry = entry->vme_next;
4135                         }
4136                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4137                                 goto done;
4138                         }
4139
4140                         /* no need to relookup again */
4141                         s = entry->vme_start;
4142                         continue;
4143                 }
4144
4145                 /*
4146                  * If this entry is already wired then increment
4147                  * the appropriate wire reference count.
4148                  */
4149                 if (entry->wired_count) {
4150                         /*
4151                          * entry is already wired down, get our reference
4152                          * after clipping to our range.
4153                          */
4154                         vm_map_clip_start(map, entry, s);
4155                         vm_map_clip_end(map, entry, end);
4156
4157                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4158                                 goto done;
4159
4160                         /* map was not unlocked: no need to relookup */
4161                         entry = entry->vme_next;
4162                         s = entry->vme_start;
4163                         continue;
4164                 }
4165
4166                 /*
4167                  * Unwired entry or wire request transmitted via submap
4168                  */
4169
4170
4171                 /*
4172                  * Perform actions of vm_map_lookup that need the write
4173                  * lock on the map: create a shadow object for a
4174                  * copy-on-write region, or an object for a zero-fill
4175                  * region.
4176                  */
4177                 size = entry->vme_end - entry->vme_start;
4178                 /*
4179                  * If wiring a copy-on-write page, we need to copy it now
4180                  * even if we're only (currently) requesting read access.
4181                  * This is aggressive, but once it's wired we can't move it.
4182                  */
4183                 if (entry->needs_copy) {
4184                         vm_object_shadow(&entry->object.vm_object,
4185                                          &entry->offset, size);
4186                         entry->needs_copy = FALSE;
4187                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4188                         entry->object.vm_object = vm_object_allocate(size);
4189                         entry->offset = (vm_object_offset_t)0;
4190                 }
4191
4192                 vm_map_clip_start(map, entry, s);
4193                 vm_map_clip_end(map, entry, end);
4194
4195                 /* re-compute "e" */
4196                 e = entry->vme_end;
4197                 if (e > end)
4198                         e = end;
4199
4200                 /*
4201                  * Check for holes and protection mismatch.
4202                  * Holes: Next entry should be contiguous unless this
4203                  *        is the end of the region.
4204                  * Protection: Access requested must be allowed, unless
4205                  *      wiring is by protection class
4206                  */
4207                 if ((entry->vme_end < end) &&
4208                     ((entry->vme_next == vm_map_to_entry(map)) ||
4209                      (entry->vme_next->vme_start > entry->vme_end))) {
4210                         /* found a hole */
4211                         rc = KERN_INVALID_ADDRESS;
4212                         goto done;
4213                 }
4214                 if ((entry->protection & access_type) != access_type) {
4215                         /* found a protection problem */
4216                         rc = KERN_PROTECTION_FAILURE;
4217                         goto done;
4218                 }
4219
4220                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4221
4222                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4223                         goto done;
4224
4225                 entry->in_transition = TRUE;
4226
4227                 /*
4228                  * This entry might get split once we unlock the map.
4229                  * In vm_fault_wire(), we need the current range as
4230                  * defined by this entry.  In order for this to work
4231                  * along with a simultaneous clip operation, we make a
4232                  * temporary copy of this entry and use that for the
4233                  * wiring.  Note that the underlying objects do not
4234                  * change during a clip.
4235                  */
4236                 tmp_entry = *entry;
4237
4238                 /*
4239                  * The in_transition state guarentees that the entry
4240                  * (or entries for this range, if split occured) will be
4241                  * there when the map lock is acquired for the second time.
4242                  */
4243                 vm_map_unlock(map);
4244
4245                 if (!user_wire && cur_thread != THREAD_NULL)
4246                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4247                 else
4248                         interruptible_state = THREAD_UNINT;
4249
4250                 if(map_pmap)
4251                         rc = vm_fault_wire(map,
4252                                            &tmp_entry, map_pmap, pmap_addr);
4253                 else
4254                         rc = vm_fault_wire(map,
4255                                            &tmp_entry, map->pmap,
4256                                            tmp_entry.vme_start);
4257
4258                 if (!user_wire && cur_thread != THREAD_NULL)
4259                         thread_interrupt_level(interruptible_state);
4260
4261                 vm_map_lock(map);
4262
4263                 if (last_timestamp+1 != map->timestamp) {
4264                         /*
4265                          * Find the entry again.  It could have been clipped
4266                          * after we unlocked the map.
4267                          */
4268                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4269                                                  &first_entry))
4270                                 panic("vm_map_wire: re-lookup failed");
4271
4272                         entry = first_entry;
4273                 }
4274
4275                 last_timestamp = map->timestamp;
4276
4277                 while ((entry != vm_map_to_entry(map)) &&
4278                        (entry->vme_start < tmp_entry.vme_end)) {
4279                         assert(entry->in_transition);
4280                         entry->in_transition = FALSE;
4281                         if (entry->needs_wakeup) {
4282                                 entry->needs_wakeup = FALSE;
4283                                 need_wakeup = TRUE;
4284                         }
4285                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4286                                 subtract_wire_counts(map, entry, user_wire);
4287                         }
4288                         entry = entry->vme_next;
4289                 }
4290
4291                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4292                         goto done;
4293                 }
4294
4295                 s = entry->vme_start;
4296         } /* end while loop through map entries */
4297
4298 done:
4299         if (rc == KERN_SUCCESS) {
4300                 /* repair any damage we may have made to the VM map */
4301                 vm_map_simplify_range(map, start, end);
4302         }
4303
4304         vm_map_unlock(map);
4305
4306         /*
4307          * wake up anybody waiting on entries we wired.
4308          */
4309         if (need_wakeup)
4310                 vm_map_entry_wakeup(map);
4311
4312         if (rc != KERN_SUCCESS) {
4313                 /* undo what has been wired so far */
4314                 vm_map_unwire(map, start, s, user_wire);
4315         }
4316
4317         return rc;
4318
4319 }
4320
4321 kern_return_t
4322 vm_map_wire(
4323         register vm_map_t       map,
4324         register vm_map_offset_t        start,
4325         register vm_map_offset_t        end,
4326         register vm_prot_t      access_type,
4327         boolean_t               user_wire)
4328 {
4329
4330         kern_return_t   kret;
4331
4332         kret = vm_map_wire_nested(map, start, end, access_type,
4333                                   user_wire, (pmap_t)NULL, 0);
4334         return kret;
4335 }
4336
4337 /*
4338  *      vm_map_unwire:
4339  *
4340  *      Sets the pageability of the specified address range in the target
4341  *      as pageable.  Regions specified must have been wired previously.
4342  *
4343  *      The map must not be locked, but a reference must remain to the map
4344  *      throughout the call.
4345  *
4346  *      Kernel will panic on failures.  User unwire ignores holes and
4347  *      unwired and intransition entries to avoid losing memory by leaving
4348  *      it unwired.
4349  */
4350 static kern_return_t
4351 vm_map_unwire_nested(
4352         register vm_map_t       map,
4353         register vm_map_offset_t        start,
4354         register vm_map_offset_t        end,
4355         boolean_t               user_wire,
4356         pmap_t                  map_pmap,
4357         vm_map_offset_t         pmap_addr)
4358 {
4359         register vm_map_entry_t entry;
4360         struct vm_map_entry     *first_entry, tmp_entry;
4361         boolean_t               need_wakeup;
4362         boolean_t               main_map = FALSE;
4363         unsigned int            last_timestamp;
4364
4365         vm_map_lock(map);
4366         if(map_pmap == NULL)
4367                 main_map = TRUE;
4368         last_timestamp = map->timestamp;
4369
4370         VM_MAP_RANGE_CHECK(map, start, end);
4371         assert(page_aligned(start));
4372         assert(page_aligned(end));
4373
4374         if (start == end) {
4375                 /* We unwired what the caller asked for: zero pages */
4376                 vm_map_unlock(map);
4377                 return KERN_SUCCESS;
4378         }
4379
4380         if (vm_map_lookup_entry(map, start, &first_entry)) {
4381                 entry = first_entry;
4382                 /*
4383                  * vm_map_clip_start will be done later.
4384                  * We don't want to unnest any nested sub maps here !
4385                  */
4386         }
4387         else {
4388                 if (!user_wire) {
4389                         panic("vm_map_unwire: start not found");
4390                 }
4391                 /*      Start address is not in map. */
4392                 vm_map_unlock(map);
4393                 return(KERN_INVALID_ADDRESS);
4394         }
4395
4396         if (entry->superpage_size) {
4397                 /* superpages are always wired */
4398                 vm_map_unlock(map);
4399                 return KERN_INVALID_ADDRESS;
4400         }
4401
4402         need_wakeup = FALSE;
4403         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4404                 if (entry->in_transition) {
4405                         /*
4406                          * 1)
4407                          * Another thread is wiring down this entry. Note
4408                          * that if it is not for the other thread we would
4409                          * be unwiring an unwired entry.  This is not
4410                          * permitted.  If we wait, we will be unwiring memory
4411                          * we did not wire.
4412                          *
4413                          * 2)
4414                          * Another thread is unwiring this entry.  We did not
4415                          * have a reference to it, because if we did, this
4416                          * entry will not be getting unwired now.
4417                          */
4418                         if (!user_wire) {
4419                                 /*
4420                                  * XXX FBDP
4421                                  * This could happen:  there could be some
4422                                  * overlapping vslock/vsunlock operations
4423                                  * going on.
4424                                  * We should probably just wait and retry,
4425                                  * but then we have to be careful that this
4426                                  * entry could get "simplified" after
4427                                  * "in_transition" gets unset and before
4428                                  * we re-lookup the entry, so we would
4429                                  * have to re-clip the entry to avoid
4430                                  * re-unwiring what we have already unwired...
4431                                  * See vm_map_wire_nested().
4432                                  *
4433                                  * Or we could just ignore "in_transition"
4434                                  * here and proceed to decement the wired
4435                                  * count(s) on this entry.  That should be fine
4436                                  * as long as "wired_count" doesn't drop all
4437                                  * the way to 0 (and we should panic if THAT
4438                                  * happens).
4439                                  */
4440                                 panic("vm_map_unwire: in_transition entry");
4441                         }
4442
4443                         entry = entry->vme_next;
4444                         continue;
4445                 }
4446
4447                 if (entry->is_sub_map) {
4448                         vm_map_offset_t sub_start;
4449                         vm_map_offset_t sub_end;
4450                         vm_map_offset_t local_end;
4451                         pmap_t          pmap;
4452
4453                         vm_map_clip_start(map, entry, start);
4454                         vm_map_clip_end(map, entry, end);
4455
4456                         sub_start = entry->offset;
4457                         sub_end = entry->vme_end - entry->vme_start;
4458                         sub_end += entry->offset;
4459                         local_end = entry->vme_end;
4460                         if(map_pmap == NULL) {
4461                                 if(entry->use_pmap) {
4462                                         pmap = entry->object.sub_map->pmap;
4463                                         pmap_addr = sub_start;
4464                                 } else {
4465                                         pmap = map->pmap;
4466                                         pmap_addr = start;
4467                                 }
4468                                 if (entry->wired_count == 0 ||
4469                                     (user_wire && entry->user_wired_count == 0)) {
4470                                         if (!user_wire)
4471                                                 panic("vm_map_unwire: entry is unwired");
4472                                         entry = entry->vme_next;
4473                                         continue;
4474                                 }
4475
4476                                 /*
4477                                  * Check for holes
4478                                  * Holes: Next entry should be contiguous unless
4479                                  * this is the end of the region.
4480                                  */
4481                                 if (((entry->vme_end < end) &&
4482                                      ((entry->vme_next == vm_map_to_entry(map)) ||
4483                                       (entry->vme_next->vme_start
4484                                        > entry->vme_end)))) {
4485                                         if (!user_wire)
4486                                                 panic("vm_map_unwire: non-contiguous region");
4487 /*
4488                                         entry = entry->vme_next;
4489                                         continue;
4490 */
4491                                 }
4492
4493                                 subtract_wire_counts(map, entry, user_wire);
4494
4495                                 if (entry->wired_count != 0) {
4496                                         entry = entry->vme_next;
4497                                         continue;
4498                                 }
4499
4500                                 entry->in_transition = TRUE;
4501                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
4502
4503                                 /*
4504                                  * We can unlock the map now. The in_transition state
4505                                  * guarantees existance of the entry.
4506                                  */
4507                                 vm_map_unlock(map);
4508                                 vm_map_unwire_nested(entry->object.sub_map,
4509                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
4510                                 vm_map_lock(map);
4511
4512                                 if (last_timestamp+1 != map->timestamp) {
4513                                         /*
4514                                          * Find the entry again.  It could have been
4515                                          * clipped or deleted after we unlocked the map.
4516                                          */
4517                                         if (!vm_map_lookup_entry(map,
4518                                                                  tmp_entry.vme_start,
4519                                                                  &first_entry)) {
4520                                                 if (!user_wire)
4521                                                         panic("vm_map_unwire: re-lookup failed");
4522                                                 entry = first_entry->vme_next;
4523                                         } else
4524                                                 entry = first_entry;
4525                                 }
4526                                 last_timestamp = map->timestamp;
4527
4528                                 /*
4529                                  * clear transition bit for all constituent entries
4530                                  * that were in the original entry (saved in
4531                                  * tmp_entry).  Also check for waiters.
4532                                  */
4533                                 while ((entry != vm_map_to_entry(map)) &&
4534                                        (entry->vme_start < tmp_entry.vme_end)) {
4535                                         assert(entry->in_transition);
4536                                         entry->in_transition = FALSE;
4537                                         if (entry->needs_wakeup) {
4538                                                 entry->needs_wakeup = FALSE;
4539                                                 need_wakeup = TRUE;
4540                                         }
4541                                         entry = entry->vme_next;
4542                                 }
4543                                 continue;
4544                         } else {
4545                                 vm_map_unlock(map);
4546                                 vm_map_unwire_nested(entry->object.sub_map,
4547                                                      sub_start, sub_end, user_wire, map_pmap,
4548                                                      pmap_addr);
4549                                 vm_map_lock(map);
4550
4551                                 if (last_timestamp+1 != map->timestamp) {
4552                                         /*
4553                                          * Find the entry again.  It could have been
4554                                          * clipped or deleted after we unlocked the map.
4555                                          */
4556                                         if (!vm_map_lookup_entry(map,
4557                                                                  tmp_entry.vme_start,
4558                                                                  &first_entry)) {
4559                                                 if (!user_wire)
4560                                                         panic("vm_map_unwire: re-lookup failed");
4561                                                 entry = first_entry->vme_next;
4562                                         } else
4563                                                 entry = first_entry;
4564                                 }
4565                                 last_timestamp = map->timestamp;
4566                         }
4567                 }
4568
4569
4570                 if ((entry->wired_count == 0) ||
4571                     (user_wire && entry->user_wired_count == 0)) {
4572                         if (!user_wire)
4573                                 panic("vm_map_unwire: entry is unwired");
4574
4575                         entry = entry->vme_next;
4576                         continue;
4577                 }
4578
4579                 assert(entry->wired_count > 0 &&
4580                        (!user_wire || entry->user_wired_count > 0));
4581
4582                 vm_map_clip_start(map, entry, start);
4583                 vm_map_clip_end(map, entry, end);
4584
4585                 /*
4586                  * Check for holes
4587                  * Holes: Next entry should be contiguous unless
4588                  *        this is the end of the region.
4589                  */
4590                 if (((entry->vme_end < end) &&
4591                      ((entry->vme_next == vm_map_to_entry(map)) ||
4592                       (entry->vme_next->vme_start > entry->vme_end)))) {
4593
4594                         if (!user_wire)
4595                                 panic("vm_map_unwire: non-contiguous region");
4596                         entry = entry->vme_next;
4597                         continue;
4598                 }
4599
4600                 subtract_wire_counts(map, entry, user_wire);
4601
4602                 if (entry->wired_count != 0) {
4603                         entry = entry->vme_next;
4604                         continue;
4605                 }
4606
4607                 if(entry->zero_wired_pages) {
4608                         entry->zero_wired_pages = FALSE;
4609                 }
4610
4611                 entry->in_transition = TRUE;
4612                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
4613
4614                 /*
4615                  * We can unlock the map now. The in_transition state
4616                  * guarantees existance of the entry.
4617                  */
4618                 vm_map_unlock(map);
4619                 if(map_pmap) {
4620                         vm_fault_unwire(map,
4621                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
4622                 } else {
4623                         vm_fault_unwire(map,
4624                                         &tmp_entry, FALSE, map->pmap,
4625                                         tmp_entry.vme_start);
4626                 }
4627                 vm_map_lock(map);
4628
4629                 if (last_timestamp+1 != map->timestamp) {
4630                         /*
4631                          * Find the entry again.  It could have been clipped
4632                          * or deleted after we unlocked the map.
4633                          */
4634                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4635                                                  &first_entry)) {
4636                                 if (!user_wire)
4637                                         panic("vm_map_unwire: re-lookup failed");
4638                                 entry = first_entry->vme_next;
4639                         } else
4640                                 entry = first_entry;
4641                 }
4642                 last_timestamp = map->timestamp;
4643
4644                 /*
4645                  * clear transition bit for all constituent entries that
4646                  * were in the original entry (saved in tmp_entry).  Also
4647                  * check for waiters.
4648                  */
4649                 while ((entry != vm_map_to_entry(map)) &&
4650                        (entry->vme_start < tmp_entry.vme_end)) {
4651                         assert(entry->in_transition);
4652                         entry->in_transition = FALSE;
4653                         if (entry->needs_wakeup) {
4654                                 entry->needs_wakeup = FALSE;
4655                                 need_wakeup = TRUE;
4656                         }
4657                         entry = entry->vme_next;
4658                 }
4659         }
4660
4661         /*
4662          * We might have fragmented the address space when we wired this
4663          * range of addresses.  Attempt to re-coalesce these VM map entries
4664          * with their neighbors now that they're no longer wired.
4665          * Under some circumstances, address space fragmentation can
4666          * prevent VM object shadow chain collapsing, which can cause
4667          * swap space leaks.
4668          */
4669         vm_map_simplify_range(map, start, end);
4670
4671         vm_map_unlock(map);
4672         /*
4673          * wake up anybody waiting on entries that we have unwired.
4674          */
4675         if (need_wakeup)
4676                 vm_map_entry_wakeup(map);
4677         return(KERN_SUCCESS);
4678
4679 }
4680
4681 kern_return_t
4682 vm_map_unwire(
4683         register vm_map_t       map,
4684         register vm_map_offset_t        start,
4685         register vm_map_offset_t        end,
4686         boolean_t               user_wire)
4687 {
4688         return vm_map_unwire_nested(map, start, end,
4689                                     user_wire, (pmap_t)NULL, 0);
4690 }
4691
4692
4693 /*
4694  *      vm_map_entry_delete:    [ internal use only ]
4695  *
4696  *      Deallocate the given entry from the target map.
4697  */
4698 static void
4699 vm_map_entry_delete(
4700         register vm_map_t       map,
4701         register vm_map_entry_t entry)
4702 {
4703         register vm_map_offset_t        s, e;
4704         register vm_object_t    object;
4705         register vm_map_t       submap;
4706
4707         s = entry->vme_start;
4708         e = entry->vme_end;
4709         assert(page_aligned(s));
4710         assert(page_aligned(e));
4711         assert(entry->wired_count == 0);
4712         assert(entry->user_wired_count == 0);
4713         assert(!entry->permanent);
4714
4715         if (entry->is_sub_map) {
4716                 object = NULL;
4717                 submap = entry->object.sub_map;
4718         } else {
4719                 submap = NULL;
4720                 object = entry->object.vm_object;
4721         }
4722
4723         vm_map_store_entry_unlink(map, entry);
4724         map->size -= e - s;
4725
4726         vm_map_entry_dispose(map, entry);
4727
4728         vm_map_unlock(map);
4729         /*
4730          *      Deallocate the object only after removing all
4731          *      pmap entries pointing to its pages.
4732          */
4733         if (submap)
4734                 vm_map_deallocate(submap);
4735         else
4736                 vm_object_deallocate(object);
4737
4738 }
4739
4740 void
4741 vm_map_submap_pmap_clean(
4742         vm_map_t        map,
4743         vm_map_offset_t start,
4744         vm_map_offset_t end,
4745         vm_map_t        sub_map,
4746         vm_map_offset_t offset)
4747 {
4748         vm_map_offset_t submap_start;
4749         vm_map_offset_t submap_end;
4750         vm_map_size_t   remove_size;
4751         vm_map_entry_t  entry;
4752
4753         submap_end = offset + (end - start);
4754         submap_start = offset;
4755
4756         vm_map_lock_read(sub_map);
4757         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
4758
4759                 remove_size = (entry->vme_end - entry->vme_start);
4760                 if(offset > entry->vme_start)
4761                         remove_size -= offset - entry->vme_start;
4762
4763
4764                 if(submap_end < entry->vme_end) {
4765                         remove_size -=
4766                                 entry->vme_end - submap_end;
4767                 }
4768                 if(entry->is_sub_map) {
4769                         vm_map_submap_pmap_clean(
4770                                 sub_map,
4771                                 start,
4772                                 start + remove_size,
4773                                 entry->object.sub_map,
4774                                 entry->offset);
4775                 } else {
4776
4777                         if((map->mapped_in_other_pmaps) && (map->ref_count)
4778                            && (entry->object.vm_object != NULL)) {
4779                                 vm_object_pmap_protect(
4780                                         entry->object.vm_object,
4781                                         entry->offset+(offset-entry->vme_start),
4782                                         remove_size,
4783                                         PMAP_NULL,
4784                                         entry->vme_start,
4785                                         VM_PROT_NONE);
4786                         } else {
4787                                 pmap_remove(map->pmap,
4788                                             (addr64_t)start,
4789                                             (addr64_t)(start + remove_size));
4790                         }
4791                 }
4792         }
4793
4794         entry = entry->vme_next;
4795
4796         while((entry != vm_map_to_entry(sub_map))
4797               && (entry->vme_start < submap_end)) {
4798                 remove_size = (entry->vme_end - entry->vme_start);
4799                 if(submap_end < entry->vme_end) {
4800                         remove_size -= entry->vme_end - submap_end;
4801                 }
4802                 if(entry->is_sub_map) {
4803                         vm_map_submap_pmap_clean(
4804                                 sub_map,
4805                                 (start + entry->vme_start) - offset,
4806                                 ((start + entry->vme_start) - offset) + remove_size,
4807                                 entry->object.sub_map,
4808                                 entry->offset);
4809                 } else {
4810                         if((map->mapped_in_other_pmaps) && (map->ref_count)
4811                            && (entry->object.vm_object != NULL)) {
4812                                 vm_object_pmap_protect(
4813                                         entry->object.vm_object,
4814                                         entry->offset,
4815                                         remove_size,
4816                                         PMAP_NULL,
4817                                         entry->vme_start,
4818                                         VM_PROT_NONE);
4819                         } else {
4820                                 pmap_remove(map->pmap,
4821                                             (addr64_t)((start + entry->vme_start)
4822                                                        - offset),
4823                                             (addr64_t)(((start + entry->vme_start)
4824                                                         - offset) + remove_size));
4825                         }
4826                 }
4827                 entry = entry->vme_next;
4828         }
4829         vm_map_unlock_read(sub_map);
4830         return;
4831 }
4832
4833 /*
4834  *      vm_map_delete:  [ internal use only ]
4835  *
4836  *      Deallocates the given address range from the target map.
4837  *      Removes all user wirings. Unwires one kernel wiring if
4838  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
4839  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
4840  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
4841  *
4842  *      This routine is called with map locked and leaves map locked.
4843  */
4844 static kern_return_t
4845 vm_map_delete(
4846         vm_map_t                map,
4847         vm_map_offset_t         start,
4848         vm_map_offset_t         end,
4849         int                     flags,
4850         vm_map_t                zap_map)
4851 {
4852         vm_map_entry_t          entry, next;
4853         struct   vm_map_entry   *first_entry, tmp_entry;
4854         register vm_map_offset_t s;
4855         register vm_object_t    object;
4856         boolean_t               need_wakeup;
4857         unsigned int            last_timestamp = ~0; /* unlikely value */
4858         int                     interruptible;
4859
4860         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
4861                 THREAD_ABORTSAFE : THREAD_UNINT;
4862
4863         /*
4864          * All our DMA I/O operations in IOKit are currently done by
4865          * wiring through the map entries of the task requesting the I/O.
4866          * Because of this, we must always wait for kernel wirings
4867          * to go away on the entries before deleting them.
4868          *
4869          * Any caller who wants to actually remove a kernel wiring
4870          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
4871          * properly remove one wiring instead of blasting through
4872          * them all.
4873          */
4874         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
4875
4876         while(1) {
4877                 /*
4878                  *      Find the start of the region, and clip it
4879                  */
4880                 if (vm_map_lookup_entry(map, start, &first_entry)) {
4881                         entry = first_entry;
4882                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
4883                                 start = SUPERPAGE_ROUND_DOWN(start);
4884                                 continue;
4885                         }
4886                         if (start == entry->vme_start) {
4887                                 /*
4888                                  * No need to clip.  We don't want to cause
4889                                  * any unnecessary unnesting in this case...
4890                                  */
4891                         } else {
4892                                 vm_map_clip_start(map, entry, start);
4893                         }
4894
4895                         /*
4896                          *      Fix the lookup hint now, rather than each
4897                          *      time through the loop.
4898                          */
4899                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
4900                 } else {
4901                         entry = first_entry->vme_next;
4902                 }
4903                 break;
4904         }
4905         if (entry->superpage_size)
4906                 end = SUPERPAGE_ROUND_UP(end);
4907
4908         need_wakeup = FALSE;
4909         /*
4910          *      Step through all entries in this region
4911          */
4912         s = entry->vme_start;
4913         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4914                 /*
4915                  * At this point, we have deleted all the memory entries
4916                  * between "start" and "s".  We still need to delete
4917                  * all memory entries between "s" and "end".
4918                  * While we were blocked and the map was unlocked, some
4919                  * new memory entries could have been re-allocated between
4920                  * "start" and "s" and we don't want to mess with those.
4921                  * Some of those entries could even have been re-assembled
4922                  * with an entry after "s" (in vm_map_simplify_entry()), so
4923                  * we may have to vm_map_clip_start() again.
4924                  */
4925
4926                 if (entry->vme_start >= s) {
4927                         /*
4928                          * This entry starts on or after "s"
4929                          * so no need to clip its start.
4930                          */
4931                 } else {
4932                         /*
4933                          * This entry has been re-assembled by a
4934                          * vm_map_simplify_entry().  We need to
4935                          * re-clip its start.
4936                          */
4937                         vm_map_clip_start(map, entry, s);
4938                 }
4939                 if (entry->vme_end <= end) {
4940                         /*
4941                          * This entry is going away completely, so no need
4942                          * to clip and possibly cause an unnecessary unnesting.
4943                          */
4944                 } else {
4945                         vm_map_clip_end(map, entry, end);
4946                 }
4947
4948                 if (entry->permanent) {
4949                         panic("attempt to remove permanent VM map entry "
4950                               "%p [0x%llx:0x%llx]\n",
4951                               entry, (uint64_t) s, (uint64_t) end);
4952                 }
4953
4954
4955                 if (entry->in_transition) {
4956                         wait_result_t wait_result;
4957
4958                         /*
4959                          * Another thread is wiring/unwiring this entry.
4960                          * Let the other thread know we are waiting.
4961                          */
4962                         assert(s == entry->vme_start);
4963                         entry->needs_wakeup = TRUE;
4964
4965                         /*
4966                          * wake up anybody waiting on entries that we have
4967                          * already unwired/deleted.
4968                          */
4969                         if (need_wakeup) {
4970                                 vm_map_entry_wakeup(map);
4971                                 need_wakeup = FALSE;
4972                         }
4973
4974                         wait_result = vm_map_entry_wait(map, interruptible);
4975
4976                         if (interruptible &&
4977                             wait_result == THREAD_INTERRUPTED) {
4978                                 /*
4979                                  * We do not clear the needs_wakeup flag,
4980                                  * since we cannot tell if we were the only one.
4981                                  */
4982                                 vm_map_unlock(map);
4983                                 return KERN_ABORTED;
4984                         }
4985
4986                         /*
4987                          * The entry could have been clipped or it
4988                          * may not exist anymore.  Look it up again.
4989                          */
4990                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4991                                 assert((map != kernel_map) &&
4992                                        (!entry->is_sub_map));
4993                                 /*
4994                                  * User: use the next entry
4995                                  */
4996                                 entry = first_entry->vme_next;
4997                                 s = entry->vme_start;
4998                         } else {
4999                                 entry = first_entry;
5000                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5001                         }
5002                         last_timestamp = map->timestamp;
5003                         continue;
5004                 } /* end in_transition */
5005
5006                 if (entry->wired_count) {
5007                         boolean_t       user_wire;
5008
5009                         user_wire = entry->user_wired_count > 0;
5010
5011                         /*
5012                          *      Remove a kernel wiring if requested
5013                          */
5014                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
5015                                 entry->wired_count--;
5016                         }
5017
5018                         /*
5019                          *      Remove all user wirings for proper accounting
5020                          */
5021                         if (entry->user_wired_count > 0) {
5022                                 while (entry->user_wired_count)
5023                                         subtract_wire_counts(map, entry, user_wire);
5024                         }
5025
5026                         if (entry->wired_count != 0) {
5027                                 assert(map != kernel_map);
5028                                 /*
5029                                  * Cannot continue.  Typical case is when
5030                                  * a user thread has physical io pending on
5031                                  * on this page.  Either wait for the
5032                                  * kernel wiring to go away or return an
5033                                  * error.
5034                                  */
5035                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5036                                         wait_result_t wait_result;
5037
5038                                         assert(s == entry->vme_start);
5039                                         entry->needs_wakeup = TRUE;
5040                                         wait_result = vm_map_entry_wait(map,
5041                                                                         interruptible);
5042
5043                                         if (interruptible &&
5044                                             wait_result == THREAD_INTERRUPTED) {
5045                                                 /*
5046                                                  * We do not clear the
5047                                                  * needs_wakeup flag, since we
5048                                                  * cannot tell if we were the
5049                                                  * only one.
5050                                                  */
5051                                                 vm_map_unlock(map);
5052                                                 return KERN_ABORTED;
5053                                         }
5054
5055                                         /*
5056                                          * The entry could have been clipped or
5057                                          * it may not exist anymore.  Look it
5058                                          * up again.
5059                                          */
5060                                         if (!vm_map_lookup_entry(map, s,
5061                                                                  &first_entry)) {
5062                                                 assert(map != kernel_map);
5063                                                 /*
5064                                                  * User: use the next entry
5065                                                  */
5066                                                 entry = first_entry->vme_next;
5067                                                 s = entry->vme_start;
5068                                         } else {
5069                                                 entry = first_entry;
5070                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5071                                         }
5072                                         last_timestamp = map->timestamp;
5073                                         continue;
5074                                 }
5075                                 else {
5076                                         return KERN_FAILURE;
5077                                 }
5078                         }
5079
5080                         entry->in_transition = TRUE;
5081                         /*
5082                          * copy current entry.  see comment in vm_map_wire()
5083                          */
5084                         tmp_entry = *entry;
5085                         assert(s == entry->vme_start);
5086
5087                         /*
5088                          * We can unlock the map now. The in_transition
5089                          * state guarentees existance of the entry.
5090                          */
5091                         vm_map_unlock(map);
5092
5093                         if (tmp_entry.is_sub_map) {
5094                                 vm_map_t sub_map;
5095                                 vm_map_offset_t sub_start, sub_end;
5096                                 pmap_t pmap;
5097                                 vm_map_offset_t pmap_addr;
5098
5099
5100                                 sub_map = tmp_entry.object.sub_map;
5101                                 sub_start = tmp_entry.offset;
5102                                 sub_end = sub_start + (tmp_entry.vme_end -
5103                                                        tmp_entry.vme_start);
5104                                 if (tmp_entry.use_pmap) {
5105                                         pmap = sub_map->pmap;
5106                                         pmap_addr = tmp_entry.vme_start;
5107                                 } else {
5108                                         pmap = map->pmap;
5109                                         pmap_addr = tmp_entry.vme_start;
5110                                 }
5111                                 (void) vm_map_unwire_nested(sub_map,
5112                                                             sub_start, sub_end,
5113                                                             user_wire,
5114                                                             pmap, pmap_addr);
5115                         } else {
5116
5117                                 vm_fault_unwire(map, &tmp_entry,
5118                                                 tmp_entry.object.vm_object == kernel_object,
5119                                                 map->pmap, tmp_entry.vme_start);
5120                         }
5121
5122                         vm_map_lock(map);
5123
5124                         if (last_timestamp+1 != map->timestamp) {
5125                                 /*
5126                                  * Find the entry again.  It could have
5127                                  * been clipped after we unlocked the map.
5128                                  */
5129                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
5130                                         assert((map != kernel_map) &&
5131                                                (!entry->is_sub_map));
5132                                         first_entry = first_entry->vme_next;
5133                                         s = first_entry->vme_start;
5134                                 } else {
5135                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5136                                 }
5137                         } else {
5138                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5139                                 first_entry = entry;
5140                         }
5141
5142                         last_timestamp = map->timestamp;
5143
5144                         entry = first_entry;
5145                         while ((entry != vm_map_to_entry(map)) &&
5146                                (entry->vme_start < tmp_entry.vme_end)) {
5147                                 assert(entry->in_transition);
5148                                 entry->in_transition = FALSE;
5149                                 if (entry->needs_wakeup) {
5150                                         entry->needs_wakeup = FALSE;
5151                                         need_wakeup = TRUE;
5152                                 }
5153                                 entry = entry->vme_next;
5154                         }
5155                         /*
5156                          * We have unwired the entry(s).  Go back and
5157                          * delete them.
5158                          */
5159                         entry = first_entry;
5160                         continue;
5161                 }
5162
5163                 /* entry is unwired */
5164                 assert(entry->wired_count == 0);
5165                 assert(entry->user_wired_count == 0);
5166
5167                 assert(s == entry->vme_start);
5168
5169                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5170                         /*
5171                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5172                          * vm_map_delete(), some map entries might have been
5173                          * transferred to a "zap_map", which doesn't have a
5174                          * pmap.  The original pmap has already been flushed
5175                          * in the vm_map_delete() call targeting the original
5176                          * map, but when we get to destroying the "zap_map",
5177                          * we don't have any pmap to flush, so let's just skip
5178                          * all this.
5179                          */
5180                 } else if (entry->is_sub_map) {
5181                         if (entry->use_pmap) {
5182 #ifndef NO_NESTED_PMAP
5183                                 pmap_unnest(map->pmap,
5184                                             (addr64_t)entry->vme_start,
5185                                             entry->vme_end - entry->vme_start);
5186 #endif  /* NO_NESTED_PMAP */
5187                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5188                                         /* clean up parent map/maps */
5189                                         vm_map_submap_pmap_clean(
5190                                                 map, entry->vme_start,
5191                                                 entry->vme_end,
5192                                                 entry->object.sub_map,
5193                                                 entry->offset);
5194                                 }
5195                         } else {
5196                                 vm_map_submap_pmap_clean(
5197                                         map, entry->vme_start, entry->vme_end,
5198                                         entry->object.sub_map,
5199                                         entry->offset);
5200                         }
5201                 } else if (entry->object.vm_object != kernel_object) {
5202                         object = entry->object.vm_object;
5203                         if((map->mapped_in_other_pmaps) && (map->ref_count)) {
5204                                 vm_object_pmap_protect(
5205                                         object, entry->offset,
5206                                         entry->vme_end - entry->vme_start,
5207                                         PMAP_NULL,
5208                                         entry->vme_start,
5209                                         VM_PROT_NONE);
5210                         } else {
5211                                 pmap_remove(map->pmap,
5212                                             (addr64_t)entry->vme_start,
5213                                             (addr64_t)entry->vme_end);
5214                         }
5215                 }
5216
5217                 /*
5218                  * All pmap mappings for this map entry must have been
5219                  * cleared by now.
5220                  */
5221                 assert(vm_map_pmap_is_empty(map,
5222                                             entry->vme_start,
5223                                             entry->vme_end));
5224
5225                 next = entry->vme_next;
5226                 s = next->vme_start;
5227                 last_timestamp = map->timestamp;
5228
5229                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
5230                     zap_map != VM_MAP_NULL) {
5231                         vm_map_size_t entry_size;
5232                         /*
5233                          * The caller wants to save the affected VM map entries
5234                          * into the "zap_map".  The caller will take care of
5235                          * these entries.
5236                          */
5237                         /* unlink the entry from "map" ... */
5238                         vm_map_store_entry_unlink(map, entry);
5239                         /* ... and add it to the end of the "zap_map" */
5240                         vm_map_store_entry_link(zap_map,
5241                                           vm_map_last_entry(zap_map),
5242                                           entry);
5243                         entry_size = entry->vme_end - entry->vme_start;
5244                         map->size -= entry_size;
5245                         zap_map->size += entry_size;
5246                         /* we didn't unlock the map, so no timestamp increase */
5247                         last_timestamp--;
5248                 } else {
5249                         vm_map_entry_delete(map, entry);
5250                         /* vm_map_entry_delete unlocks the map */
5251                         vm_map_lock(map);
5252                 }
5253
5254                 entry = next;
5255
5256                 if(entry == vm_map_to_entry(map)) {
5257                         break;
5258                 }
5259                 if (last_timestamp+1 != map->timestamp) {
5260                         /*
5261                          * we are responsible for deleting everything
5262                          * from the give space, if someone has interfered
5263                          * we pick up where we left off, back fills should
5264                          * be all right for anyone except map_delete and
5265                          * we have to assume that the task has been fully
5266                          * disabled before we get here
5267                          */
5268                         if (!vm_map_lookup_entry(map, s, &entry)){
5269                                 entry = entry->vme_next;
5270                                 s = entry->vme_start;
5271                         } else {
5272                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5273                         }
5274                         /*
5275                          * others can not only allocate behind us, we can
5276                          * also see coalesce while we don't have the map lock
5277                          */
5278                         if(entry == vm_map_to_entry(map)) {
5279                                 break;
5280                         }
5281                 }
5282                 last_timestamp = map->timestamp;
5283         }
5284
5285         if (map->wait_for_space)
5286                 thread_wakeup((event_t) map);
5287         /*
5288          * wake up anybody waiting on entries that we have already deleted.
5289          */
5290         if (need_wakeup)
5291                 vm_map_entry_wakeup(map);
5292
5293         return KERN_SUCCESS;
5294 }
5295
5296 /*
5297  *      vm_map_remove:
5298  *
5299  *      Remove the given address range from the target map.
5300  *      This is the exported form of vm_map_delete.
5301  */
5302 kern_return_t
5303 vm_map_remove(
5304         register vm_map_t       map,
5305         register vm_map_offset_t        start,
5306         register vm_map_offset_t        end,
5307         register boolean_t      flags)
5308 {
5309         register kern_return_t  result;
5310
5311         vm_map_lock(map);
5312         VM_MAP_RANGE_CHECK(map, start, end);
5313         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
5314         vm_map_unlock(map);
5315
5316         return(result);
5317 }
5318
5319
5320 /*
5321  *      Routine:        vm_map_copy_discard
5322  *
5323  *      Description:
5324  *              Dispose of a map copy object (returned by
5325  *              vm_map_copyin).
5326  */
5327 void
5328 vm_map_copy_discard(
5329         vm_map_copy_t   copy)
5330 {
5331         if (copy == VM_MAP_COPY_NULL)
5332                 return;
5333
5334         switch (copy->type) {
5335         case VM_MAP_COPY_ENTRY_LIST:
5336                 while (vm_map_copy_first_entry(copy) !=
5337                        vm_map_copy_to_entry(copy)) {
5338                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
5339
5340                         vm_map_copy_entry_unlink(copy, entry);
5341                         vm_object_deallocate(entry->object.vm_object);
5342                         vm_map_copy_entry_dispose(copy, entry);
5343                 }
5344                 break;
5345         case VM_MAP_COPY_OBJECT:
5346                 vm_object_deallocate(copy->cpy_object);
5347                 break;
5348         case VM_MAP_COPY_KERNEL_BUFFER:
5349
5350                 /*
5351                  * The vm_map_copy_t and possibly the data buffer were
5352                  * allocated by a single call to kalloc(), i.e. the
5353                  * vm_map_copy_t was not allocated out of the zone.
5354                  */
5355                 kfree(copy, copy->cpy_kalloc_size);
5356                 return;
5357         }
5358         zfree(vm_map_copy_zone, copy);
5359 }
5360
5361 /*
5362  *      Routine:        vm_map_copy_copy
5363  *
5364  *      Description:
5365  *                      Move the information in a map copy object to
5366  *                      a new map copy object, leaving the old one
5367  *                      empty.
5368  *
5369  *                      This is used by kernel routines that need
5370  *                      to look at out-of-line data (in copyin form)
5371  *                      before deciding whether to return SUCCESS.
5372  *                      If the routine returns FAILURE, the original
5373  *                      copy object will be deallocated; therefore,
5374  *                      these routines must make a copy of the copy
5375  *                      object and leave the original empty so that
5376  *                      deallocation will not fail.
5377  */
5378 vm_map_copy_t
5379 vm_map_copy_copy(
5380         vm_map_copy_t   copy)
5381 {
5382         vm_map_copy_t   new_copy;
5383
5384         if (copy == VM_MAP_COPY_NULL)
5385                 return VM_MAP_COPY_NULL;
5386
5387         /*
5388          * Allocate a new copy object, and copy the information
5389          * from the old one into it.
5390          */
5391
5392         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5393         *new_copy = *copy;
5394
5395         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5396                 /*
5397                  * The links in the entry chain must be
5398                  * changed to point to the new copy object.
5399                  */
5400                 vm_map_copy_first_entry(copy)->vme_prev
5401                         = vm_map_copy_to_entry(new_copy);
5402                 vm_map_copy_last_entry(copy)->vme_next
5403                         = vm_map_copy_to_entry(new_copy);
5404         }
5405
5406         /*
5407          * Change the old copy object into one that contains
5408          * nothing to be deallocated.
5409          */
5410         copy->type = VM_MAP_COPY_OBJECT;
5411         copy->cpy_object = VM_OBJECT_NULL;
5412
5413         /*
5414          * Return the new object.
5415          */
5416         return new_copy;
5417 }
5418
5419 static kern_return_t
5420 vm_map_overwrite_submap_recurse(
5421         vm_map_t        dst_map,
5422         vm_map_offset_t dst_addr,
5423         vm_map_size_t   dst_size)
5424 {
5425         vm_map_offset_t dst_end;
5426         vm_map_entry_t  tmp_entry;
5427         vm_map_entry_t  entry;
5428         kern_return_t   result;
5429         boolean_t       encountered_sub_map = FALSE;
5430
5431
5432
5433         /*
5434          *      Verify that the destination is all writeable
5435          *      initially.  We have to trunc the destination
5436          *      address and round the copy size or we'll end up
5437          *      splitting entries in strange ways.
5438          */
5439
5440         dst_end = vm_map_round_page(dst_addr + dst_size);
5441         vm_map_lock(dst_map);
5442
5443 start_pass_1:
5444         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5445                 vm_map_unlock(dst_map);
5446                 return(KERN_INVALID_ADDRESS);
5447         }
5448
5449         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5450         assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */
5451
5452         for (entry = tmp_entry;;) {
5453                 vm_map_entry_t  next;
5454
5455                 next = entry->vme_next;
5456                 while(entry->is_sub_map) {
5457                         vm_map_offset_t sub_start;
5458                         vm_map_offset_t sub_end;
5459                         vm_map_offset_t local_end;
5460
5461                         if (entry->in_transition) {
5462                                 /*
5463                                  * Say that we are waiting, and wait for entry.
5464                                  */
5465                                 entry->needs_wakeup = TRUE;
5466                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5467
5468                                 goto start_pass_1;
5469                         }
5470
5471                         encountered_sub_map = TRUE;
5472                         sub_start = entry->offset;
5473
5474                         if(entry->vme_end < dst_end)
5475                                 sub_end = entry->vme_end;
5476                         else
5477                                 sub_end = dst_end;
5478                         sub_end -= entry->vme_start;
5479                         sub_end += entry->offset;
5480                         local_end = entry->vme_end;
5481                         vm_map_unlock(dst_map);
5482
5483                         result = vm_map_overwrite_submap_recurse(
5484                                 entry->object.sub_map,
5485                                 sub_start,
5486                                 sub_end - sub_start);
5487
5488                         if(result != KERN_SUCCESS)
5489                                 return result;
5490                         if (dst_end <= entry->vme_end)
5491                                 return KERN_SUCCESS;
5492                         vm_map_lock(dst_map);
5493                         if(!vm_map_lookup_entry(dst_map, local_end,
5494                                                 &tmp_entry)) {
5495                                 vm_map_unlock(dst_map);
5496                                 return(KERN_INVALID_ADDRESS);
5497                         }
5498                         entry = tmp_entry;
5499                         next = entry->vme_next;
5500                 }
5501
5502                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5503                         vm_map_unlock(dst_map);
5504                         return(KERN_PROTECTION_FAILURE);
5505                 }
5506
5507                 /*
5508                  *      If the entry is in transition, we must wait
5509                  *      for it to exit that state.  Anything could happen
5510                  *      when we unlock the map, so start over.
5511                  */
5512                 if (entry->in_transition) {
5513
5514                         /*
5515                          * Say that we are waiting, and wait for entry.
5516                          */
5517                         entry->needs_wakeup = TRUE;
5518                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5519
5520                         goto start_pass_1;
5521                 }
5522
5523 /*
5524  *              our range is contained completely within this map entry
5525  */
5526                 if (dst_end <= entry->vme_end) {
5527                         vm_map_unlock(dst_map);
5528                         return KERN_SUCCESS;
5529                 }
5530 /*
5531  *              check that range specified is contiguous region
5532  */
5533                 if ((next == vm_map_to_entry(dst_map)) ||
5534                     (next->vme_start != entry->vme_end)) {
5535                         vm_map_unlock(dst_map);
5536                         return(KERN_INVALID_ADDRESS);
5537                 }
5538
5539                 /*
5540                  *      Check for permanent objects in the destination.
5541                  */
5542                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5543                     ((!entry->object.vm_object->internal) ||
5544                      (entry->object.vm_object->true_share))) {
5545                         if(encountered_sub_map) {
5546                                 vm_map_unlock(dst_map);
5547                                 return(KERN_FAILURE);
5548                         }
5549                 }
5550
5551
5552                 entry = next;
5553         }/* for */
5554         vm_map_unlock(dst_map);
5555         return(KERN_SUCCESS);
5556 }
5557
5558 /*
5559  *      Routine:        vm_map_copy_overwrite
5560  *
5561  *      Description:
5562  *              Copy the memory described by the map copy
5563  *              object (copy; returned by vm_map_copyin) onto
5564  *              the specified destination region (dst_map, dst_addr).
5565  *              The destination must be writeable.
5566  *
5567  *              Unlike vm_map_copyout, this routine actually
5568  *              writes over previously-mapped memory.  If the
5569  *              previous mapping was to a permanent (user-supplied)
5570  *              memory object, it is preserved.
5571  *
5572  *              The attributes (protection and inheritance) of the
5573  *              destination region are preserved.
5574  *
5575  *              If successful, consumes the copy object.
5576  *              Otherwise, the caller is responsible for it.
5577  *
5578  *      Implementation notes:
5579  *              To overwrite aligned temporary virtual memory, it is
5580  *              sufficient to remove the previous mapping and insert
5581  *              the new copy.  This replacement is done either on
5582  *              the whole region (if no permanent virtual memory
5583  *              objects are embedded in the destination region) or
5584  *              in individual map entries.
5585  *
5586  *              To overwrite permanent virtual memory , it is necessary
5587  *              to copy each page, as the external memory management
5588  *              interface currently does not provide any optimizations.
5589  *
5590  *              Unaligned memory also has to be copied.  It is possible
5591  *              to use 'vm_trickery' to copy the aligned data.  This is
5592  *              not done but not hard to implement.
5593  *
5594  *              Once a page of permanent memory has been overwritten,
5595  *              it is impossible to interrupt this function; otherwise,
5596  *              the call would be neither atomic nor location-independent.
5597  *              The kernel-state portion of a user thread must be
5598  *              interruptible.
5599  *
5600  *              It may be expensive to forward all requests that might
5601  *              overwrite permanent memory (vm_write, vm_copy) to
5602  *              uninterruptible kernel threads.  This routine may be
5603  *              called by interruptible threads; however, success is
5604  *              not guaranteed -- if the request cannot be performed
5605  *              atomically and interruptibly, an error indication is
5606  *              returned.
5607  */
5608
5609 static kern_return_t
5610 vm_map_copy_overwrite_nested(
5611         vm_map_t                dst_map,
5612         vm_map_address_t        dst_addr,
5613         vm_map_copy_t           copy,
5614         boolean_t               interruptible,
5615         pmap_t                  pmap,
5616         boolean_t               discard_on_success)
5617 {
5618         vm_map_offset_t         dst_end;
5619         vm_map_entry_t          tmp_entry;
5620         vm_map_entry_t          entry;
5621         kern_return_t           kr;
5622         boolean_t               aligned = TRUE;
5623         boolean_t               contains_permanent_objects = FALSE;
5624         boolean_t               encountered_sub_map = FALSE;
5625         vm_map_offset_t         base_addr;
5626         vm_map_size_t           copy_size;
5627         vm_map_size_t           total_size;
5628
5629
5630         /*
5631          *      Check for null copy object.
5632          */
5633
5634         if (copy == VM_MAP_COPY_NULL)
5635                 return(KERN_SUCCESS);
5636
5637         /*
5638          *      Check for special kernel buffer allocated
5639          *      by new_ipc_kmsg_copyin.
5640          */
5641
5642         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
5643                 return(vm_map_copyout_kernel_buffer(
5644                                dst_map, &dst_addr,
5645                                copy, TRUE));
5646         }
5647
5648         /*
5649          *      Only works for entry lists at the moment.  Will
5650          *      support page lists later.
5651          */
5652
5653         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
5654
5655         if (copy->size == 0) {
5656                 if (discard_on_success)
5657                         vm_map_copy_discard(copy);
5658                 return(KERN_SUCCESS);
5659         }
5660
5661         /*
5662          *      Verify that the destination is all writeable
5663          *      initially.  We have to trunc the destination
5664          *      address and round the copy size or we'll end up
5665          *      splitting entries in strange ways.
5666          */
5667
5668         if (!page_aligned(copy->size) ||
5669             !page_aligned (copy->offset) ||
5670             !page_aligned (dst_addr))
5671         {
5672                 aligned = FALSE;
5673                 dst_end = vm_map_round_page(dst_addr + copy->size);
5674         } else {
5675                 dst_end = dst_addr + copy->size;
5676         }
5677
5678         vm_map_lock(dst_map);
5679
5680         /* LP64todo - remove this check when vm_map_commpage64()
5681          * no longer has to stuff in a map_entry for the commpage
5682          * above the map's max_offset.
5683          */
5684         if (dst_addr >= dst_map->max_offset) {
5685                 vm_map_unlock(dst_map);
5686                 return(KERN_INVALID_ADDRESS);
5687         }
5688
5689 start_pass_1:
5690         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
5691                 vm_map_unlock(dst_map);
5692                 return(KERN_INVALID_ADDRESS);
5693         }
5694         vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr));
5695         for (entry = tmp_entry;;) {
5696                 vm_map_entry_t  next = entry->vme_next;
5697
5698                 while(entry->is_sub_map) {
5699                         vm_map_offset_t sub_start;
5700                         vm_map_offset_t sub_end;
5701                         vm_map_offset_t local_end;
5702
5703                         if (entry->in_transition) {
5704
5705                                 /*
5706                                  * Say that we are waiting, and wait for entry.
5707                                  */
5708                                 entry->needs_wakeup = TRUE;
5709                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5710
5711                                 goto start_pass_1;
5712                         }
5713
5714                         local_end = entry->vme_end;
5715                         if (!(entry->needs_copy)) {
5716                                 /* if needs_copy we are a COW submap */
5717                                 /* in such a case we just replace so */
5718                                 /* there is no need for the follow-  */
5719                                 /* ing check.                        */
5720                                 encountered_sub_map = TRUE;
5721                                 sub_start = entry->offset;
5722
5723                                 if(entry->vme_end < dst_end)
5724                                         sub_end = entry->vme_end;
5725                                 else
5726                                         sub_end = dst_end;
5727                                 sub_end -= entry->vme_start;
5728                                 sub_end += entry->offset;
5729                                 vm_map_unlock(dst_map);
5730
5731                                 kr = vm_map_overwrite_submap_recurse(
5732                                         entry->object.sub_map,
5733                                         sub_start,
5734                                         sub_end - sub_start);
5735                                 if(kr != KERN_SUCCESS)
5736                                         return kr;
5737                                 vm_map_lock(dst_map);
5738                         }
5739
5740                         if (dst_end <= entry->vme_end)
5741                                 goto start_overwrite;
5742                         if(!vm_map_lookup_entry(dst_map, local_end,
5743                                                 &entry)) {
5744                                 vm_map_unlock(dst_map);
5745                                 return(KERN_INVALID_ADDRESS);
5746                         }
5747                         next = entry->vme_next;
5748                 }
5749
5750                 if ( ! (entry->protection & VM_PROT_WRITE)) {
5751                         vm_map_unlock(dst_map);
5752                         return(KERN_PROTECTION_FAILURE);
5753                 }
5754
5755                 /*
5756                  *      If the entry is in transition, we must wait
5757                  *      for it to exit that state.  Anything could happen
5758                  *      when we unlock the map, so start over.
5759                  */
5760                 if (entry->in_transition) {
5761
5762                         /*
5763                          * Say that we are waiting, and wait for entry.
5764                          */
5765                         entry->needs_wakeup = TRUE;
5766                         vm_map_entry_wait(dst_map, THREAD_UNINT);
5767
5768                         goto start_pass_1;
5769                 }
5770
5771 /*
5772  *              our range is contained completely within this map entry
5773  */
5774                 if (dst_end <= entry->vme_end)
5775                         break;
5776 /*
5777  *              check that range specified is contiguous region
5778  */
5779                 if ((next == vm_map_to_entry(dst_map)) ||
5780                     (next->vme_start != entry->vme_end)) {
5781                         vm_map_unlock(dst_map);
5782                         return(KERN_INVALID_ADDRESS);
5783                 }
5784
5785
5786                 /*
5787                  *      Check for permanent objects in the destination.
5788                  */
5789                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
5790                     ((!entry->object.vm_object->internal) ||
5791                      (entry->object.vm_object->true_share))) {
5792                         contains_permanent_objects = TRUE;
5793                 }
5794
5795                 entry = next;
5796         }/* for */
5797
5798 start_overwrite:
5799         /*
5800          *      If there are permanent objects in the destination, then
5801          *      the copy cannot be interrupted.
5802          */
5803
5804         if (interruptible && contains_permanent_objects) {
5805                 vm_map_unlock(dst_map);
5806                 return(KERN_FAILURE);   /* XXX */
5807         }
5808
5809         /*
5810          *
5811          *      Make a second pass, overwriting the data
5812          *      At the beginning of each loop iteration,
5813          *      the next entry to be overwritten is "tmp_entry"
5814          *      (initially, the value returned from the lookup above),
5815          *      and the starting address expected in that entry
5816          *      is "start".
5817          */
5818
5819         total_size = copy->size;
5820         if(encountered_sub_map) {
5821                 copy_size = 0;
5822                 /* re-calculate tmp_entry since we've had the map */
5823                 /* unlocked */
5824                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
5825                         vm_map_unlock(dst_map);
5826                         return(KERN_INVALID_ADDRESS);
5827                 }
5828         } else {
5829                 copy_size = copy->size;
5830         }
5831
5832         base_addr = dst_addr;
5833         while(TRUE) {
5834                 /* deconstruct the copy object and do in parts */
5835                 /* only in sub_map, interruptable case */
5836                 vm_map_entry_t  copy_entry;
5837                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
5838                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
5839                 int             nentries;
5840                 int             remaining_entries = 0;
5841                 vm_map_offset_t new_offset = 0;
5842
5843                 for (entry = tmp_entry; copy_size == 0;) {
5844                         vm_map_entry_t  next;
5845
5846                         next = entry->vme_next;
5847
5848                         /* tmp_entry and base address are moved along */
5849                         /* each time we encounter a sub-map.  Otherwise */
5850                         /* entry can outpase tmp_entry, and the copy_size */
5851                         /* may reflect the distance between them */
5852                         /* if the current entry is found to be in transition */
5853                         /* we will start over at the beginning or the last */
5854                         /* encounter of a submap as dictated by base_addr */
5855                         /* we will zero copy_size accordingly. */
5856                         if (entry->in_transition) {
5857                                 /*
5858                                  * Say that we are waiting, and wait for entry.
5859                                  */
5860                                 entry->needs_wakeup = TRUE;
5861                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
5862
5863                                 if(!vm_map_lookup_entry(dst_map, base_addr,
5864                                                         &tmp_entry)) {
5865                                         vm_map_unlock(dst_map);
5866                                         return(KERN_INVALID_ADDRESS);
5867                                 }
5868                                 copy_size = 0;
5869                                 entry = tmp_entry;
5870                                 continue;
5871                         }
5872                         if(entry->is_sub_map) {
5873                                 vm_map_offset_t sub_start;
5874                                 vm_map_offset_t sub_end;
5875                                 vm_map_offset_t local_end;
5876
5877                                 if (entry->needs_copy) {
5878                                         /* if this is a COW submap */
5879                                         /* just back the range with a */
5880                                         /* anonymous entry */
5881                                         if(entry->vme_end < dst_end)
5882                                                 sub_end = entry->vme_end;
5883                                         else
5884                                                 sub_end = dst_end;
5885                                         if(entry->vme_start < base_addr)
5886                                                 sub_start = base_addr;
5887                                         else
5888                                                 sub_start = entry->vme_start;
5889                                         vm_map_clip_end(
5890                                                 dst_map, entry, sub_end);
5891                                         vm_map_clip_start(
5892                                                 dst_map, entry, sub_start);
5893                                         assert(!entry->use_pmap);
5894                                         entry->is_sub_map = FALSE;
5895                                         vm_map_deallocate(
5896                                                 entry->object.sub_map);
5897                                         entry->object.sub_map = NULL;
5898                                         entry->is_shared = FALSE;
5899                                         entry->needs_copy = FALSE;
5900                                         entry->offset = 0;
5901                                         /*
5902                                          * XXX FBDP
5903                                          * We should propagate the protections
5904                                          * of the submap entry here instead
5905                                          * of forcing them to VM_PROT_ALL...
5906                                          * Or better yet, we should inherit
5907                                          * the protection of the copy_entry.
5908                                          */
5909                                         entry->protection = VM_PROT_ALL;
5910                                         entry->max_protection = VM_PROT_ALL;
5911                                         entry->wired_count = 0;
5912                                         entry->user_wired_count = 0;
5913                                         if(entry->inheritance
5914                                            == VM_INHERIT_SHARE)
5915                                                 entry->inheritance = VM_INHERIT_COPY;
5916                                         continue;
5917                                 }
5918                                 /* first take care of any non-sub_map */
5919                                 /* entries to send */
5920                                 if(base_addr < entry->vme_start) {
5921                                         /* stuff to send */
5922                                         copy_size =
5923                                                 entry->vme_start - base_addr;
5924                                         break;
5925                                 }
5926                                 sub_start = entry->offset;
5927
5928                                 if(entry->vme_end < dst_end)
5929                                         sub_end = entry->vme_end;
5930                                 else
5931                                         sub_end = dst_end;
5932                                 sub_end -= entry->vme_start;
5933                                 sub_end += entry->offset;
5934                                 local_end = entry->vme_end;
5935                                 vm_map_unlock(dst_map);
5936                                 copy_size = sub_end - sub_start;
5937
5938                                 /* adjust the copy object */
5939                                 if (total_size > copy_size) {
5940                                         vm_map_size_t   local_size = 0;
5941                                         vm_map_size_t   entry_size;
5942
5943                                         nentries = 1;
5944                                         new_offset = copy->offset;
5945                                         copy_entry = vm_map_copy_first_entry(copy);
5946                                         while(copy_entry !=
5947                                               vm_map_copy_to_entry(copy)){
5948                                                 entry_size = copy_entry->vme_end -
5949                                                         copy_entry->vme_start;
5950                                                 if((local_size < copy_size) &&
5951                                                    ((local_size + entry_size)
5952                                                     >= copy_size)) {
5953                                                         vm_map_copy_clip_end(copy,
5954                                                                              copy_entry,
5955                                                                              copy_entry->vme_start +
5956                                                                              (copy_size - local_size));
5957                                                         entry_size = copy_entry->vme_end -
5958                                                                 copy_entry->vme_start;
5959                                                         local_size += entry_size;
5960                                                         new_offset += entry_size;
5961                                                 }
5962                                                 if(local_size >= copy_size) {
5963                                                         next_copy = copy_entry->vme_next;
5964                                                         copy_entry->vme_next =
5965                                                                 vm_map_copy_to_entry(copy);
5966                                                         previous_prev =
5967                                                                 copy->cpy_hdr.links.prev;
5968                                                         copy->cpy_hdr.links.prev = copy_entry;
5969                                                         copy->size = copy_size;
5970                                                         remaining_entries =
5971                                                                 copy->cpy_hdr.nentries;
5972                                                         remaining_entries -= nentries;
5973                                                         copy->cpy_hdr.nentries = nentries;
5974                                                         break;
5975                                                 } else {
5976                                                         local_size += entry_size;
5977                                                         new_offset += entry_size;
5978                                                         nentries++;
5979                                                 }
5980                                                 copy_entry = copy_entry->vme_next;
5981                                         }
5982                                 }
5983
5984                                 if((entry->use_pmap) && (pmap == NULL)) {
5985                                         kr = vm_map_copy_overwrite_nested(
5986                                                 entry->object.sub_map,
5987                                                 sub_start,
5988                                                 copy,
5989                                                 interruptible,
5990                                                 entry->object.sub_map->pmap,
5991                                                 TRUE);
5992                                 } else if (pmap != NULL) {
5993                                         kr = vm_map_copy_overwrite_nested(
5994                                                 entry->object.sub_map,
5995                                                 sub_start,
5996                                                 copy,
5997                                                 interruptible, pmap,
5998                                                 TRUE);
5999                                 } else {
6000                                         kr = vm_map_copy_overwrite_nested(
6001                                                 entry->object.sub_map,
6002                                                 sub_start,
6003                                                 copy,
6004                                                 interruptible,
6005                                                 dst_map->pmap,
6006                                                 TRUE);
6007                                 }
6008                                 if(kr != KERN_SUCCESS) {
6009                                         if(next_copy != NULL) {
6010                                                 copy->cpy_hdr.nentries +=
6011                                                         remaining_entries;
6012                                                 copy->cpy_hdr.links.prev->vme_next =
6013                                                         next_copy;
6014                                                 copy->cpy_hdr.links.prev
6015                                                         = previous_prev;
6016                                                 copy->size = total_size;
6017                                         }
6018                                         return kr;
6019                                 }
6020                                 if (dst_end <= local_end) {
6021                                         return(KERN_SUCCESS);
6022                                 }
6023                                 /* otherwise copy no longer exists, it was */
6024                                 /* destroyed after successful copy_overwrite */
6025                                 copy = (vm_map_copy_t)
6026                                         zalloc(vm_map_copy_zone);
6027                                 vm_map_copy_first_entry(copy) =
6028                                         vm_map_copy_last_entry(copy) =
6029                                         vm_map_copy_to_entry(copy);
6030                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
6031                                 copy->offset = new_offset;
6032
6033                                 /*
6034                                  * XXX FBDP
6035                                  * this does not seem to deal with
6036                                  * the VM map store (R&B tree)
6037                                  */
6038
6039                                 total_size -= copy_size;
6040                                 copy_size = 0;
6041                                 /* put back remainder of copy in container */
6042                                 if(next_copy != NULL) {
6043                                         copy->cpy_hdr.nentries = remaining_entries;
6044                                         copy->cpy_hdr.links.next = next_copy;
6045                                         copy->cpy_hdr.links.prev = previous_prev;
6046                                         copy->size = total_size;
6047                                         next_copy->vme_prev =
6048                                                 vm_map_copy_to_entry(copy);
6049                                         next_copy = NULL;
6050                                 }
6051                                 base_addr = local_end;
6052                                 vm_map_lock(dst_map);
6053                                 if(!vm_map_lookup_entry(dst_map,
6054                                                         local_end, &tmp_entry)) {
6055                                         vm_map_unlock(dst_map);
6056                                         return(KERN_INVALID_ADDRESS);
6057                                 }
6058                                 entry = tmp_entry;
6059                                 continue;
6060                         }
6061                         if (dst_end <= entry->vme_end) {
6062                                 copy_size = dst_end - base_addr;
6063                                 break;
6064                         }
6065
6066                         if ((next == vm_map_to_entry(dst_map)) ||
6067                             (next->vme_start != entry->vme_end)) {
6068                                 vm_map_unlock(dst_map);
6069                                 return(KERN_INVALID_ADDRESS);
6070                         }
6071
6072                         entry = next;
6073                 }/* for */
6074
6075                 next_copy = NULL;
6076                 nentries = 1;
6077
6078                 /* adjust the copy object */
6079                 if (total_size > copy_size) {
6080                         vm_map_size_t   local_size = 0;
6081                         vm_map_size_t   entry_size;
6082
6083                         new_offset = copy->offset;
6084                         copy_entry = vm_map_copy_first_entry(copy);
6085                         while(copy_entry != vm_map_copy_to_entry(copy)) {
6086                                 entry_size = copy_entry->vme_end -
6087                                         copy_entry->vme_start;
6088                                 if((local_size < copy_size) &&
6089                                    ((local_size + entry_size)
6090                                     >= copy_size)) {
6091                                         vm_map_copy_clip_end(copy, copy_entry,
6092                                                              copy_entry->vme_start +
6093                                                              (copy_size - local_size));
6094                                         entry_size = copy_entry->vme_end -
6095                                                 copy_entry->vme_start;
6096                                         local_size += entry_size;
6097                                         new_offset += entry_size;
6098                                 }
6099                                 if(local_size >= copy_size) {
6100                                         next_copy = copy_entry->vme_next;
6101                                         copy_entry->vme_next =
6102                                                 vm_map_copy_to_entry(copy);
6103                                         previous_prev =
6104                                                 copy->cpy_hdr.links.prev;
6105                                         copy->cpy_hdr.links.prev = copy_entry;
6106                                         copy->size = copy_size;
6107                                         remaining_entries =
6108                                                 copy->cpy_hdr.nentries;
6109                                         remaining_entries -= nentries;
6110                                         copy->cpy_hdr.nentries = nentries;
6111                                         break;
6112                                 } else {
6113                                         local_size += entry_size;
6114                                         new_offset += entry_size;
6115                                         nentries++;
6116                                 }
6117                                 copy_entry = copy_entry->vme_next;
6118                         }
6119                 }
6120
6121                 if (aligned) {
6122                         pmap_t  local_pmap;
6123
6124                         if(pmap)
6125                                 local_pmap = pmap;
6126                         else
6127                                 local_pmap = dst_map->pmap;
6128
6129                         if ((kr =  vm_map_copy_overwrite_aligned(
6130                                      dst_map, tmp_entry, copy,
6131                                      base_addr, local_pmap)) != KERN_SUCCESS) {
6132                                 if(next_copy != NULL) {
6133                                         copy->cpy_hdr.nentries +=
6134                                                 remaining_entries;
6135                                         copy->cpy_hdr.links.prev->vme_next =
6136                                                 next_copy;
6137                                         copy->cpy_hdr.links.prev =
6138                                                 previous_prev;
6139                                         copy->size += copy_size;
6140                                 }
6141                                 return kr;
6142                         }
6143                         vm_map_unlock(dst_map);
6144                 } else {
6145                         /*
6146                          * Performance gain:
6147                          *
6148                          * if the copy and dst address are misaligned but the same
6149                          * offset within the page we can copy_not_aligned the
6150                          * misaligned parts and copy aligned the rest.  If they are
6151                          * aligned but len is unaligned we simply need to copy
6152                          * the end bit unaligned.  We'll need to split the misaligned
6153                          * bits of the region in this case !
6154                          */
6155                         /* ALWAYS UNLOCKS THE dst_map MAP */
6156                         if ((kr =  vm_map_copy_overwrite_unaligned( dst_map,
6157                                                                     tmp_entry, copy, base_addr)) != KERN_SUCCESS) {
6158                                 if(next_copy != NULL) {
6159                                         copy->cpy_hdr.nentries +=
6160                                                 remaining_entries;
6161                                         copy->cpy_hdr.links.prev->vme_next =
6162                                                 next_copy;
6163                                         copy->cpy_hdr.links.prev =
6164                                                 previous_prev;
6165                                         copy->size += copy_size;
6166                                 }
6167                                 return kr;
6168                         }
6169                 }
6170                 total_size -= copy_size;
6171                 if(total_size == 0)
6172                         break;
6173                 base_addr += copy_size;
6174                 copy_size = 0;
6175                 copy->offset = new_offset;
6176                 if(next_copy != NULL) {
6177                         copy->cpy_hdr.nentries = remaining_entries;
6178                         copy->cpy_hdr.links.next = next_copy;
6179                         copy->cpy_hdr.links.prev = previous_prev;
6180                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
6181                         copy->size = total_size;
6182                 }
6183                 vm_map_lock(dst_map);
6184                 while(TRUE) {
6185                         if (!vm_map_lookup_entry(dst_map,
6186                                                  base_addr, &tmp_entry)) {
6187                                 vm_map_unlock(dst_map);
6188                                 return(KERN_INVALID_ADDRESS);
6189                         }
6190                         if (tmp_entry->in_transition) {
6191                                 entry->needs_wakeup = TRUE;
6192                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6193                         } else {
6194                                 break;
6195                         }
6196                 }
6197                 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr));
6198
6199                 entry = tmp_entry;
6200         } /* while */
6201
6202         /*
6203          *      Throw away the vm_map_copy object
6204          */
6205         if (discard_on_success)
6206                 vm_map_copy_discard(copy);
6207
6208         return(KERN_SUCCESS);
6209 }/* vm_map_copy_overwrite */
6210
6211 kern_return_t
6212 vm_map_copy_overwrite(
6213         vm_map_t        dst_map,
6214         vm_map_offset_t dst_addr,
6215         vm_map_copy_t   copy,
6216         boolean_t       interruptible)
6217 {
6218         vm_map_size_t   head_size, tail_size;
6219         vm_map_copy_t   head_copy, tail_copy;
6220         vm_map_offset_t head_addr, tail_addr;
6221         vm_map_entry_t  entry;
6222         kern_return_t   kr;
6223
6224         head_size = 0;
6225         tail_size = 0;
6226         head_copy = NULL;
6227         tail_copy = NULL;
6228         head_addr = 0;
6229         tail_addr = 0;
6230
6231         if (interruptible ||
6232             copy == VM_MAP_COPY_NULL ||
6233             copy->type != VM_MAP_COPY_ENTRY_LIST) {
6234                 /*
6235                  * We can't split the "copy" map if we're interruptible
6236                  * or if we don't have a "copy" map...
6237                  */
6238         blunt_copy:
6239                 return vm_map_copy_overwrite_nested(dst_map,
6240                                                     dst_addr,
6241                                                     copy,
6242                                                     interruptible,
6243                                                     (pmap_t) NULL,
6244                                                     TRUE);
6245         }
6246
6247         if (copy->size < 3 * PAGE_SIZE) {
6248                 /*
6249                  * Too small to bother with optimizing...
6250                  */
6251                 goto blunt_copy;
6252         }
6253
6254         if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) {
6255                 /*
6256                  * Incompatible mis-alignment of source and destination...
6257                  */
6258                 goto blunt_copy;
6259         }
6260
6261         /*
6262          * Proper alignment or identical mis-alignment at the beginning.
6263          * Let's try and do a small unaligned copy first (if needed)
6264          * and then an aligned copy for the rest.
6265          */
6266         if (!page_aligned(dst_addr)) {
6267                 head_addr = dst_addr;
6268                 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK);
6269         }
6270         if (!page_aligned(copy->offset + copy->size)) {
6271                 /*
6272                  * Mis-alignment at the end.
6273                  * Do an aligned copy up to the last page and
6274                  * then an unaligned copy for the remaining bytes.
6275                  */
6276                 tail_size = (copy->offset + copy->size) & PAGE_MASK;
6277                 tail_addr = dst_addr + copy->size - tail_size;
6278         }
6279
6280         if (head_size + tail_size == copy->size) {
6281                 /*
6282                  * It's all unaligned, no optimization possible...
6283                  */
6284                 goto blunt_copy;
6285         }
6286
6287         /*
6288          * Can't optimize if there are any submaps in the
6289          * destination due to the way we free the "copy" map
6290          * progressively in vm_map_copy_overwrite_nested()
6291          * in that case.
6292          */
6293         vm_map_lock_read(dst_map);
6294         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
6295                 vm_map_unlock_read(dst_map);
6296                 goto blunt_copy;
6297         }
6298         for (;
6299              (entry != vm_map_copy_to_entry(copy) &&
6300               entry->vme_start < dst_addr + copy->size);
6301              entry = entry->vme_next) {
6302                 if (entry->is_sub_map) {
6303                         vm_map_unlock_read(dst_map);
6304                         goto blunt_copy;
6305                 }
6306         }
6307         vm_map_unlock_read(dst_map);
6308
6309         if (head_size) {
6310                 /*
6311                  * Unaligned copy of the first "head_size" bytes, to reach
6312                  * a page boundary.
6313                  */
6314
6315                 /*
6316                  * Extract "head_copy" out of "copy".
6317                  */
6318                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6319                 vm_map_copy_first_entry(head_copy) =
6320                         vm_map_copy_to_entry(head_copy);
6321                 vm_map_copy_last_entry(head_copy) =
6322                         vm_map_copy_to_entry(head_copy);
6323                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
6324                 head_copy->cpy_hdr.nentries = 0;
6325                 head_copy->cpy_hdr.entries_pageable =
6326                         copy->cpy_hdr.entries_pageable;
6327                 vm_map_store_init(&head_copy->cpy_hdr);
6328
6329                 head_copy->offset = copy->offset;
6330                 head_copy->size = head_size;
6331
6332                 copy->offset += head_size;
6333                 copy->size -= head_size;
6334
6335                 entry = vm_map_copy_first_entry(copy);
6336                 vm_map_copy_clip_end(copy, entry, copy->offset);
6337                 vm_map_copy_entry_unlink(copy, entry);
6338                 vm_map_copy_entry_link(head_copy,
6339                                        vm_map_copy_to_entry(head_copy),
6340                                        entry);
6341
6342                 /*
6343                  * Do the unaligned copy.
6344                  */
6345                 kr = vm_map_copy_overwrite_nested(dst_map,
6346                                                   head_addr,
6347                                                   head_copy,
6348                                                   interruptible,
6349                                                   (pmap_t) NULL,
6350                                                   FALSE);
6351                 if (kr != KERN_SUCCESS)
6352                         goto done;
6353         }
6354
6355         if (tail_size) {
6356                 /*
6357                  * Extract "tail_copy" out of "copy".
6358                  */
6359                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6360                 vm_map_copy_first_entry(tail_copy) =
6361                         vm_map_copy_to_entry(tail_copy);
6362                 vm_map_copy_last_entry(tail_copy) =
6363                         vm_map_copy_to_entry(tail_copy);
6364                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
6365                 tail_copy->cpy_hdr.nentries = 0;
6366                 tail_copy->cpy_hdr.entries_pageable =
6367                         copy->cpy_hdr.entries_pageable;
6368                 vm_map_store_init(&tail_copy->cpy_hdr);
6369
6370                 tail_copy->offset = copy->offset + copy->size - tail_size;
6371                 tail_copy->size = tail_size;
6372
6373                 copy->size -= tail_size;
6374
6375                 entry = vm_map_copy_last_entry(copy);
6376                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
6377                 entry = vm_map_copy_last_entry(copy);
6378                 vm_map_copy_entry_unlink(copy, entry);
6379                 vm_map_copy_entry_link(tail_copy,
6380                                        vm_map_copy_last_entry(tail_copy),
6381                                        entry);
6382         }
6383
6384         /*
6385          * Copy most (or possibly all) of the data.
6386          */
6387         kr = vm_map_copy_overwrite_nested(dst_map,
6388                                           dst_addr + head_size,
6389                                           copy,
6390                                           interruptible,
6391                                           (pmap_t) NULL,
6392                                           FALSE);
6393         if (kr != KERN_SUCCESS) {
6394                 goto done;
6395         }
6396
6397         if (tail_size) {
6398                 kr = vm_map_copy_overwrite_nested(dst_map,
6399                                                   tail_addr,
6400                                                   tail_copy,
6401                                                   interruptible,
6402                                                   (pmap_t) NULL,
6403                                                   FALSE);
6404         }
6405
6406 done:
6407         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6408         if (kr == KERN_SUCCESS) {
6409                 /*
6410                  * Discard all the copy maps.
6411                  */
6412                 if (head_copy) {
6413                         vm_map_copy_discard(head_copy);
6414                         head_copy = NULL;
6415                 }
6416                 vm_map_copy_discard(copy);
6417                 if (tail_copy) {
6418                         vm_map_copy_discard(tail_copy);
6419                         tail_copy = NULL;
6420                 }
6421         } else {
6422                 /*
6423                  * Re-assemble the original copy map.
6424                  */
6425                 if (head_copy) {
6426                         entry = vm_map_copy_first_entry(head_copy);
6427                         vm_map_copy_entry_unlink(head_copy, entry);
6428                         vm_map_copy_entry_link(copy,
6429                                                vm_map_copy_to_entry(copy),
6430                                                entry);
6431                         copy->offset -= head_size;
6432                         copy->size += head_size;
6433                         vm_map_copy_discard(head_copy);
6434                         head_copy = NULL;
6435                 }
6436                 if (tail_copy) {
6437                         entry = vm_map_copy_last_entry(tail_copy);
6438                         vm_map_copy_entry_unlink(tail_copy, entry);
6439                         vm_map_copy_entry_link(copy,
6440                                                vm_map_copy_last_entry(copy),
6441                                                entry);
6442                         copy->size += tail_size;
6443                         vm_map_copy_discard(tail_copy);
6444                         tail_copy = NULL;
6445                 }
6446         }
6447         return kr;
6448 }
6449
6450
6451 /*
6452  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
6453  *
6454  *      Decription:
6455  *      Physically copy unaligned data
6456  *
6457  *      Implementation:
6458  *      Unaligned parts of pages have to be physically copied.  We use
6459  *      a modified form of vm_fault_copy (which understands none-aligned
6460  *      page offsets and sizes) to do the copy.  We attempt to copy as
6461  *      much memory in one go as possibly, however vm_fault_copy copies
6462  *      within 1 memory object so we have to find the smaller of "amount left"
6463  *      "source object data size" and "target object data size".  With
6464  *      unaligned data we don't need to split regions, therefore the source
6465  *      (copy) object should be one map entry, the target range may be split
6466  *      over multiple map entries however.  In any event we are pessimistic
6467  *      about these assumptions.
6468  *
6469  *      Assumptions:
6470  *      dst_map is locked on entry and is return locked on success,
6471  *      unlocked on error.
6472  */
6473
6474 static kern_return_t
6475 vm_map_copy_overwrite_unaligned(
6476         vm_map_t        dst_map,
6477         vm_map_entry_t  entry,
6478         vm_map_copy_t   copy,
6479         vm_map_offset_t start)
6480 {
6481         vm_map_entry_t          copy_entry = vm_map_copy_first_entry(copy);
6482         vm_map_version_t        version;
6483         vm_object_t             dst_object;
6484         vm_object_offset_t      dst_offset;
6485         vm_object_offset_t      src_offset;
6486         vm_object_offset_t      entry_offset;
6487         vm_map_offset_t         entry_end;
6488         vm_map_size_t           src_size,
6489                                 dst_size,
6490                                 copy_size,
6491                                 amount_left;
6492         kern_return_t           kr = KERN_SUCCESS;
6493
6494         vm_map_lock_write_to_read(dst_map);
6495
6496         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
6497         amount_left = copy->size;
6498 /*
6499  *      unaligned so we never clipped this entry, we need the offset into
6500  *      the vm_object not just the data.
6501  */
6502         while (amount_left > 0) {
6503
6504                 if (entry == vm_map_to_entry(dst_map)) {
6505                         vm_map_unlock_read(dst_map);
6506                         return KERN_INVALID_ADDRESS;
6507                 }
6508
6509                 /* "start" must be within the current map entry */
6510                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
6511
6512                 dst_offset = start - entry->vme_start;
6513
6514                 dst_size = entry->vme_end - start;
6515
6516                 src_size = copy_entry->vme_end -
6517                         (copy_entry->vme_start + src_offset);
6518
6519                 if (dst_size < src_size) {
6520 /*
6521  *                      we can only copy dst_size bytes before
6522  *                      we have to get the next destination entry
6523  */
6524                         copy_size = dst_size;
6525                 } else {
6526 /*
6527  *                      we can only copy src_size bytes before
6528  *                      we have to get the next source copy entry
6529  */
6530                         copy_size = src_size;
6531                 }
6532
6533                 if (copy_size > amount_left) {
6534                         copy_size = amount_left;
6535                 }
6536 /*
6537  *              Entry needs copy, create a shadow shadow object for
6538  *              Copy on write region.
6539  */
6540                 if (entry->needs_copy &&
6541                     ((entry->protection & VM_PROT_WRITE) != 0))
6542                 {
6543                         if (vm_map_lock_read_to_write(dst_map)) {
6544                                 vm_map_lock_read(dst_map);
6545                                 goto RetryLookup;
6546                         }
6547                         vm_object_shadow(&entry->object.vm_object,
6548                                          &entry->offset,
6549                                          (vm_map_size_t)(entry->vme_end
6550                                                          - entry->vme_start));
6551                         entry->needs_copy = FALSE;
6552                         vm_map_lock_write_to_read(dst_map);
6553                 }
6554                 dst_object = entry->object.vm_object;
6555 /*
6556  *              unlike with the virtual (aligned) copy we're going
6557  *              to fault on it therefore we need a target object.
6558  */
6559                 if (dst_object == VM_OBJECT_NULL) {
6560                         if (vm_map_lock_read_to_write(dst_map)) {
6561                                 vm_map_lock_read(dst_map);
6562                                 goto RetryLookup;
6563                         }
6564                         dst_object = vm_object_allocate((vm_map_size_t)
6565                                                         entry->vme_end - entry->vme_start);
6566                         entry->object.vm_object = dst_object;
6567                         entry->offset = 0;
6568                         vm_map_lock_write_to_read(dst_map);
6569                 }
6570 /*
6571  *              Take an object reference and unlock map. The "entry" may
6572  *              disappear or change when the map is unlocked.
6573  */
6574                 vm_object_reference(dst_object);
6575                 version.main_timestamp = dst_map->timestamp;
6576                 entry_offset = entry->offset;
6577                 entry_end = entry->vme_end;
6578                 vm_map_unlock_read(dst_map);
6579 /*
6580  *              Copy as much as possible in one pass
6581  */
6582                 kr = vm_fault_copy(
6583                         copy_entry->object.vm_object,
6584                         copy_entry->offset + src_offset,
6585                         &copy_size,
6586                         dst_object,
6587                         entry_offset + dst_offset,
6588                         dst_map,
6589                         &version,
6590                         THREAD_UNINT );
6591
6592                 start += copy_size;
6593                 src_offset += copy_size;
6594                 amount_left -= copy_size;
6595 /*
6596  *              Release the object reference
6597  */
6598                 vm_object_deallocate(dst_object);
6599 /*
6600  *              If a hard error occurred, return it now
6601  */
6602                 if (kr != KERN_SUCCESS)
6603                         return kr;
6604
6605                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
6606                     || amount_left == 0)
6607                 {
6608 /*
6609  *                      all done with this copy entry, dispose.
6610  */
6611                         vm_map_copy_entry_unlink(copy, copy_entry);
6612                         vm_object_deallocate(copy_entry->object.vm_object);
6613                         vm_map_copy_entry_dispose(copy, copy_entry);
6614
6615                         if ((copy_entry = vm_map_copy_first_entry(copy))
6616                             == vm_map_copy_to_entry(copy) && amount_left) {
6617 /*
6618  *                              not finished copying but run out of source
6619  */
6620                                 return KERN_INVALID_ADDRESS;
6621                         }
6622                         src_offset = 0;
6623                 }
6624
6625                 if (amount_left == 0)
6626                         return KERN_SUCCESS;
6627
6628                 vm_map_lock_read(dst_map);
6629                 if (version.main_timestamp == dst_map->timestamp) {
6630                         if (start == entry_end) {
6631 /*
6632  *                              destination region is split.  Use the version
6633  *                              information to avoid a lookup in the normal
6634  *                              case.
6635  */
6636                                 entry = entry->vme_next;
6637 /*
6638  *                              should be contiguous. Fail if we encounter
6639  *                              a hole in the destination.
6640  */
6641                                 if (start != entry->vme_start) {
6642                                         vm_map_unlock_read(dst_map);
6643                                         return KERN_INVALID_ADDRESS ;
6644                                 }
6645                         }
6646                 } else {
6647 /*
6648  *                      Map version check failed.
6649  *                      we must lookup the entry because somebody
6650  *                      might have changed the map behind our backs.
6651  */
6652                 RetryLookup:
6653                         if (!vm_map_lookup_entry(dst_map, start, &entry))
6654                         {
6655                                 vm_map_unlock_read(dst_map);
6656                                 return KERN_INVALID_ADDRESS ;
6657                         }
6658                 }
6659         }/* while */
6660
6661         return KERN_SUCCESS;
6662 }/* vm_map_copy_overwrite_unaligned */
6663
6664 /*
6665  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
6666  *
6667  *      Description:
6668  *      Does all the vm_trickery possible for whole pages.
6669  *
6670  *      Implementation:
6671  *
6672  *      If there are no permanent objects in the destination,
6673  *      and the source and destination map entry zones match,
6674  *      and the destination map entry is not shared,
6675  *      then the map entries can be deleted and replaced
6676  *      with those from the copy.  The following code is the
6677  *      basic idea of what to do, but there are lots of annoying
6678  *      little details about getting protection and inheritance
6679  *      right.  Should add protection, inheritance, and sharing checks
6680  *      to the above pass and make sure that no wiring is involved.
6681  */
6682
6683 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
6684 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
6685 int vm_map_copy_overwrite_aligned_src_large = 0;
6686
6687 static kern_return_t
6688 vm_map_copy_overwrite_aligned(
6689         vm_map_t        dst_map,
6690         vm_map_entry_t  tmp_entry,
6691         vm_map_copy_t   copy,
6692         vm_map_offset_t start,
6693         __unused pmap_t pmap)
6694 {
6695         vm_object_t     object;
6696         vm_map_entry_t  copy_entry;
6697         vm_map_size_t   copy_size;
6698         vm_map_size_t   size;
6699         vm_map_entry_t  entry;
6700
6701         while ((copy_entry = vm_map_copy_first_entry(copy))
6702                != vm_map_copy_to_entry(copy))
6703         {
6704                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
6705
6706                 entry = tmp_entry;
6707                 assert(!entry->use_pmap); /* unnested when clipped earlier */
6708                 if (entry == vm_map_to_entry(dst_map)) {
6709                         vm_map_unlock(dst_map);
6710                         return KERN_INVALID_ADDRESS;
6711                 }
6712                 size = (entry->vme_end - entry->vme_start);
6713                 /*
6714                  *      Make sure that no holes popped up in the
6715                  *      address map, and that the protection is
6716                  *      still valid, in case the map was unlocked
6717                  *      earlier.
6718                  */
6719
6720                 if ((entry->vme_start != start) || ((entry->is_sub_map)
6721                                                     && !entry->needs_copy)) {
6722                         vm_map_unlock(dst_map);
6723                         return(KERN_INVALID_ADDRESS);
6724                 }
6725                 assert(entry != vm_map_to_entry(dst_map));
6726
6727                 /*
6728                  *      Check protection again
6729                  */
6730
6731                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6732                         vm_map_unlock(dst_map);
6733                         return(KERN_PROTECTION_FAILURE);
6734                 }
6735
6736                 /*
6737                  *      Adjust to source size first
6738                  */
6739
6740                 if (copy_size < size) {
6741                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
6742                         size = copy_size;
6743                 }
6744
6745                 /*
6746                  *      Adjust to destination size
6747                  */
6748
6749                 if (size < copy_size) {
6750                         vm_map_copy_clip_end(copy, copy_entry,
6751                                              copy_entry->vme_start + size);
6752                         copy_size = size;
6753                 }
6754
6755                 assert((entry->vme_end - entry->vme_start) == size);
6756                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
6757                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
6758
6759                 /*
6760                  *      If the destination contains temporary unshared memory,
6761                  *      we can perform the copy by throwing it away and
6762                  *      installing the source data.
6763                  */
6764
6765                 object = entry->object.vm_object;
6766                 if ((!entry->is_shared &&
6767                      ((object == VM_OBJECT_NULL) ||
6768                       (object->internal && !object->true_share))) ||
6769                     entry->needs_copy) {
6770                         vm_object_t     old_object = entry->object.vm_object;
6771                         vm_object_offset_t      old_offset = entry->offset;
6772                         vm_object_offset_t      offset;
6773
6774                         /*
6775                          * Ensure that the source and destination aren't
6776                          * identical
6777                          */
6778                         if (old_object == copy_entry->object.vm_object &&
6779                             old_offset == copy_entry->offset) {
6780                                 vm_map_copy_entry_unlink(copy, copy_entry);
6781                                 vm_map_copy_entry_dispose(copy, copy_entry);
6782
6783                                 if (old_object != VM_OBJECT_NULL)
6784                                         vm_object_deallocate(old_object);
6785
6786                                 start = tmp_entry->vme_end;
6787                                 tmp_entry = tmp_entry->vme_next;
6788                                 continue;
6789                         }
6790
6791 #if !CONFIG_EMBEDDED
6792 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
6793 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
6794                         if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
6795                             copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
6796                             copy_size <= __TRADEOFF1_COPY_SIZE) {
6797                                 /*
6798                                  * Virtual vs. Physical copy tradeoff #1.
6799                                  *
6800                                  * Copying only a few pages out of a large
6801                                  * object:  do a physical copy instead of
6802                                  * a virtual copy, to avoid possibly keeping
6803                                  * the entire large object alive because of
6804                                  * those few copy-on-write pages.
6805                                  */
6806                                 vm_map_copy_overwrite_aligned_src_large++;
6807                                 goto slow_copy;
6808                         }
6809 #endif /* !CONFIG_EMBEDDED */
6810
6811                         if (entry->alias >= VM_MEMORY_MALLOC &&
6812                             entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
6813                                 vm_object_t new_object, new_shadow;
6814
6815                                 /*
6816                                  * We're about to map something over a mapping
6817                                  * established by malloc()...
6818                                  */
6819                                 new_object = copy_entry->object.vm_object;
6820                                 if (new_object != VM_OBJECT_NULL) {
6821                                         vm_object_lock_shared(new_object);
6822                                 }
6823                                 while (new_object != VM_OBJECT_NULL &&
6824 #if !CONFIG_EMBEDDED
6825                                        !new_object->true_share &&
6826                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
6827 #endif /* !CONFIG_EMBEDDED */
6828                                        new_object->internal) {
6829                                         new_shadow = new_object->shadow;
6830                                         if (new_shadow == VM_OBJECT_NULL) {
6831                                                 break;
6832                                         }
6833                                         vm_object_lock_shared(new_shadow);
6834                                         vm_object_unlock(new_object);
6835                                         new_object = new_shadow;
6836                                 }
6837                                 if (new_object != VM_OBJECT_NULL) {
6838                                         if (!new_object->internal) {
6839                                                 /*
6840                                                  * The new mapping is backed
6841                                                  * by an external object.  We
6842                                                  * don't want malloc'ed memory
6843                                                  * to be replaced with such a
6844                                                  * non-anonymous mapping, so
6845                                                  * let's go off the optimized
6846                                                  * path...
6847                                                  */
6848                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
6849                                                 vm_object_unlock(new_object);
6850                                                 goto slow_copy;
6851                                         }
6852 #if !CONFIG_EMBEDDED
6853                                         if (new_object->true_share ||
6854                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
6855                                                 /*
6856                                                  * Same if there's a "true_share"
6857                                                  * object in the shadow chain, or
6858                                                  * an object with a non-default
6859                                                  * (SYMMETRIC) copy strategy.
6860                                                  */
6861                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
6862                                                 vm_object_unlock(new_object);
6863                                                 goto slow_copy;
6864                                         }
6865 #endif /* !CONFIG_EMBEDDED */
6866                                         vm_object_unlock(new_object);
6867                                 }
6868                                 /*
6869                                  * The new mapping is still backed by
6870                                  * anonymous (internal) memory, so it's
6871                                  * OK to substitute it for the original
6872                                  * malloc() mapping.
6873                                  */
6874                         }
6875
6876                         if (old_object != VM_OBJECT_NULL) {
6877                                 if(entry->is_sub_map) {
6878                                         if(entry->use_pmap) {
6879 #ifndef NO_NESTED_PMAP
6880                                                 pmap_unnest(dst_map->pmap,
6881                                                             (addr64_t)entry->vme_start,
6882                                                             entry->vme_end - entry->vme_start);
6883 #endif  /* NO_NESTED_PMAP */
6884                                                 if(dst_map->mapped_in_other_pmaps) {
6885                                                         /* clean up parent */
6886                                                         /* map/maps */
6887                                                         vm_map_submap_pmap_clean(
6888                                                                 dst_map, entry->vme_start,
6889                                                                 entry->vme_end,
6890                                                                 entry->object.sub_map,
6891                                                                 entry->offset);
6892                                                 }
6893                                         } else {
6894                                                 vm_map_submap_pmap_clean(
6895                                                         dst_map, entry->vme_start,
6896                                                         entry->vme_end,
6897                                                         entry->object.sub_map,
6898                                                         entry->offset);
6899                                         }
6900                                         vm_map_deallocate(
6901                                                 entry->object.sub_map);
6902                                 } else {
6903                                         if(dst_map->mapped_in_other_pmaps) {
6904                                                 vm_object_pmap_protect(
6905                                                         entry->object.vm_object,
6906                                                         entry->offset,
6907                                                         entry->vme_end
6908                                                         - entry->vme_start,
6909                                                         PMAP_NULL,
6910                                                         entry->vme_start,
6911                                                         VM_PROT_NONE);
6912                                         } else {
6913                                                 pmap_remove(dst_map->pmap,
6914                                                             (addr64_t)(entry->vme_start),
6915                                                             (addr64_t)(entry->vme_end));
6916                                         }
6917                                         vm_object_deallocate(old_object);
6918                                 }
6919                         }
6920
6921                         entry->is_sub_map = FALSE;
6922                         entry->object = copy_entry->object;
6923                         object = entry->object.vm_object;
6924                         entry->needs_copy = copy_entry->needs_copy;
6925                         entry->wired_count = 0;
6926                         entry->user_wired_count = 0;
6927                         offset = entry->offset = copy_entry->offset;
6928
6929                         vm_map_copy_entry_unlink(copy, copy_entry);
6930                         vm_map_copy_entry_dispose(copy, copy_entry);
6931
6932                         /*
6933                          * we could try to push pages into the pmap at this point, BUT
6934                          * this optimization only saved on average 2 us per page if ALL
6935                          * the pages in the source were currently mapped
6936                          * and ALL the pages in the dest were touched, if there were fewer
6937                          * than 2/3 of the pages touched, this optimization actually cost more cycles
6938                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
6939                          */
6940
6941                         /*
6942                          *      Set up for the next iteration.  The map
6943                          *      has not been unlocked, so the next
6944                          *      address should be at the end of this
6945                          *      entry, and the next map entry should be
6946                          *      the one following it.
6947                          */
6948
6949                         start = tmp_entry->vme_end;
6950                         tmp_entry = tmp_entry->vme_next;
6951                 } else {
6952                         vm_map_version_t        version;
6953                         vm_object_t             dst_object;
6954                         vm_object_offset_t      dst_offset;
6955                         kern_return_t           r;
6956
6957                 slow_copy:
6958                         if (entry->needs_copy) {
6959                                 vm_object_shadow(&entry->object.vm_object,
6960                                                  &entry->offset,
6961                                                  (entry->vme_end -
6962                                                   entry->vme_start));
6963                                 entry->needs_copy = FALSE;
6964                         }
6965
6966                         dst_object = entry->object.vm_object;
6967                         dst_offset = entry->offset;
6968
6969                         /*
6970                          *      Take an object reference, and record
6971                          *      the map version information so that the
6972                          *      map can be safely unlocked.
6973                          */
6974
6975                         if (dst_object == VM_OBJECT_NULL) {
6976                                 /*
6977                                  * We would usually have just taken the
6978                                  * optimized path above if the destination
6979                                  * object has not been allocated yet.  But we
6980                                  * now disable that optimization if the copy
6981                                  * entry's object is not backed by anonymous
6982                                  * memory to avoid replacing malloc'ed
6983                                  * (i.e. re-usable) anonymous memory with a
6984                                  * not-so-anonymous mapping.
6985                                  * So we have to handle this case here and
6986                                  * allocate a new VM object for this map entry.
6987                                  */
6988                                 dst_object = vm_object_allocate(
6989                                         entry->vme_end - entry->vme_start);
6990                                 dst_offset = 0;
6991                                 entry->object.vm_object = dst_object;
6992                                 entry->offset = dst_offset;
6993
6994                         }
6995
6996                         vm_object_reference(dst_object);
6997
6998                         /* account for unlock bumping up timestamp */
6999                         version.main_timestamp = dst_map->timestamp + 1;
7000
7001                         vm_map_unlock(dst_map);
7002
7003                         /*
7004                          *      Copy as much as possible in one pass
7005                          */
7006
7007                         copy_size = size;
7008                         r = vm_fault_copy(
7009                                 copy_entry->object.vm_object,
7010                                 copy_entry->offset,
7011                                 &copy_size,
7012                                 dst_object,
7013                                 dst_offset,
7014                                 dst_map,
7015                                 &version,
7016                                 THREAD_UNINT );
7017
7018                         /*
7019                          *      Release the object reference
7020                          */
7021
7022                         vm_object_deallocate(dst_object);
7023
7024                         /*
7025                          *      If a hard error occurred, return it now
7026                          */
7027
7028                         if (r != KERN_SUCCESS)
7029                                 return(r);
7030
7031                         if (copy_size != 0) {
7032                                 /*
7033                                  *      Dispose of the copied region
7034                                  */
7035
7036                                 vm_map_copy_clip_end(copy, copy_entry,
7037                                                      copy_entry->vme_start + copy_size);
7038                                 vm_map_copy_entry_unlink(copy, copy_entry);
7039                                 vm_object_deallocate(copy_entry->object.vm_object);
7040                                 vm_map_copy_entry_dispose(copy, copy_entry);
7041                         }
7042
7043                         /*
7044                          *      Pick up in the destination map where we left off.
7045                          *
7046                          *      Use the version information to avoid a lookup
7047                          *      in the normal case.
7048                          */
7049
7050                         start += copy_size;
7051                         vm_map_lock(dst_map);
7052                         if (version.main_timestamp == dst_map->timestamp &&
7053                             copy_size != 0) {
7054                                 /* We can safely use saved tmp_entry value */
7055
7056                                 vm_map_clip_end(dst_map, tmp_entry, start);
7057                                 tmp_entry = tmp_entry->vme_next;
7058                         } else {
7059                                 /* Must do lookup of tmp_entry */
7060
7061                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7062                                         vm_map_unlock(dst_map);
7063                                         return(KERN_INVALID_ADDRESS);
7064                                 }
7065                                 vm_map_clip_start(dst_map, tmp_entry, start);
7066                         }
7067                 }
7068         }/* while */
7069
7070         return(KERN_SUCCESS);
7071 }/* vm_map_copy_overwrite_aligned */
7072
7073 /*
7074  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
7075  *
7076  *      Description:
7077  *              Copy in data to a kernel buffer from space in the
7078  *              source map. The original space may be optionally
7079  *              deallocated.
7080  *
7081  *              If successful, returns a new copy object.
7082  */
7083 static kern_return_t
7084 vm_map_copyin_kernel_buffer(
7085         vm_map_t        src_map,
7086         vm_map_offset_t src_addr,
7087         vm_map_size_t   len,
7088         boolean_t       src_destroy,
7089         vm_map_copy_t   *copy_result)
7090 {
7091         kern_return_t kr;
7092         vm_map_copy_t copy;
7093         vm_size_t kalloc_size;
7094
7095         if ((vm_size_t) len != len) {
7096                 /* "len" is too big and doesn't fit in a "vm_size_t" */
7097                 return KERN_RESOURCE_SHORTAGE;
7098         }
7099         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7100         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7101
7102         copy = (vm_map_copy_t) kalloc(kalloc_size);
7103         if (copy == VM_MAP_COPY_NULL) {
7104                 return KERN_RESOURCE_SHORTAGE;
7105         }
7106         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7107         copy->size = len;
7108         copy->offset = 0;
7109         copy->cpy_kdata = (void *) (copy + 1);
7110         copy->cpy_kalloc_size = kalloc_size;
7111
7112         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7113         if (kr != KERN_SUCCESS) {
7114                 kfree(copy, kalloc_size);
7115                 return kr;
7116         }
7117         if (src_destroy) {
7118                 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr),
7119                                      vm_map_round_page(src_addr + len),
7120                                      VM_MAP_REMOVE_INTERRUPTIBLE |
7121                                      VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7122                                      (src_map == kernel_map) ?
7123                                      VM_MAP_REMOVE_KUNWIRE : 0);
7124         }
7125         *copy_result = copy;
7126         return KERN_SUCCESS;
7127 }
7128
7129 /*
7130  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
7131  *
7132  *      Description:
7133  *              Copy out data from a kernel buffer into space in the
7134  *              destination map. The space may be otpionally dynamically
7135  *              allocated.
7136  *
7137  *              If successful, consumes the copy object.
7138  *              Otherwise, the caller is responsible for it.
7139  */
7140 static int vm_map_copyout_kernel_buffer_failures = 0;
7141 static kern_return_t
7142 vm_map_copyout_kernel_buffer(
7143         vm_map_t                map,
7144         vm_map_address_t        *addr,  /* IN/OUT */
7145         vm_map_copy_t           copy,
7146         boolean_t               overwrite)
7147 {
7148         kern_return_t kr = KERN_SUCCESS;
7149         thread_t thread = current_thread();
7150
7151         if (!overwrite) {
7152
7153                 /*
7154                  * Allocate space in the target map for the data
7155                  */
7156                 *addr = 0;
7157                 kr = vm_map_enter(map,
7158                                   addr,
7159                                   vm_map_round_page(copy->size),
7160                                   (vm_map_offset_t) 0,
7161                                   VM_FLAGS_ANYWHERE,
7162                                   VM_OBJECT_NULL,
7163                                   (vm_object_offset_t) 0,
7164                                   FALSE,
7165                                   VM_PROT_DEFAULT,
7166                                   VM_PROT_ALL,
7167                                   VM_INHERIT_DEFAULT);
7168                 if (kr != KERN_SUCCESS)
7169                         return kr;
7170         }
7171
7172         /*
7173          * Copyout the data from the kernel buffer to the target map.
7174          */
7175         if (thread->map == map) {
7176
7177                 /*
7178                  * If the target map is the current map, just do
7179                  * the copy.
7180                  */
7181                 assert((vm_size_t) copy->size == copy->size);
7182                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7183                         kr = KERN_INVALID_ADDRESS;
7184                 }
7185         }
7186         else {
7187                 vm_map_t oldmap;
7188
7189                 /*
7190                  * If the target map is another map, assume the
7191                  * target's address space identity for the duration
7192                  * of the copy.
7193                  */
7194                 vm_map_reference(map);
7195                 oldmap = vm_map_switch(map);
7196
7197                 assert((vm_size_t) copy->size == copy->size);
7198                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
7199                         vm_map_copyout_kernel_buffer_failures++;
7200                         kr = KERN_INVALID_ADDRESS;
7201                 }
7202
7203                 (void) vm_map_switch(oldmap);
7204                 vm_map_deallocate(map);
7205         }
7206
7207         if (kr != KERN_SUCCESS) {
7208                 /* the copy failed, clean up */
7209                 if (!overwrite) {
7210                         /*
7211                          * Deallocate the space we allocated in the target map.
7212                          */
7213                         (void) vm_map_remove(map,
7214                                              vm_map_trunc_page(*addr),
7215                                              vm_map_round_page(*addr +
7216                                                                vm_map_round_page(copy->size)),
7217                                              VM_MAP_NO_FLAGS);
7218                         *addr = 0;
7219                 }
7220         } else {
7221                 /* copy was successful, dicard the copy structure */
7222                 kfree(copy, copy->cpy_kalloc_size);
7223         }
7224
7225         return kr;
7226 }
7227
7228 /*
7229  *      Macro:          vm_map_copy_insert
7230  *
7231  *      Description:
7232  *              Link a copy chain ("copy") into a map at the
7233  *              specified location (after "where").
7234  *      Side effects:
7235  *              The copy chain is destroyed.
7236  *      Warning:
7237  *              The arguments are evaluated multiple times.
7238  */
7239 #define vm_map_copy_insert(map, where, copy)                            \
7240 MACRO_BEGIN                                                             \
7241         vm_map_store_copy_insert(map, where, copy);       \
7242         zfree(vm_map_copy_zone, copy);          \
7243 MACRO_END
7244
7245 /*
7246  *      Routine:        vm_map_copyout
7247  *
7248  *      Description:
7249  *              Copy out a copy chain ("copy") into newly-allocated
7250  *              space in the destination map.
7251  *
7252  *              If successful, consumes the copy object.
7253  *              Otherwise, the caller is responsible for it.
7254  */
7255 kern_return_t
7256 vm_map_copyout(
7257         vm_map_t                dst_map,
7258         vm_map_address_t        *dst_addr,      /* OUT */
7259         vm_map_copy_t           copy)
7260 {
7261         vm_map_size_t           size;
7262         vm_map_size_t           adjustment;
7263         vm_map_offset_t         start;
7264         vm_object_offset_t      vm_copy_start;
7265         vm_map_entry_t          last;
7266         register
7267         vm_map_entry_t          entry;
7268
7269         /*
7270          *      Check for null copy object.
7271          */
7272
7273         if (copy == VM_MAP_COPY_NULL) {
7274                 *dst_addr = 0;
7275                 return(KERN_SUCCESS);
7276         }
7277
7278         /*
7279          *      Check for special copy object, created
7280          *      by vm_map_copyin_object.
7281          */
7282
7283         if (copy->type == VM_MAP_COPY_OBJECT) {
7284                 vm_object_t             object = copy->cpy_object;
7285                 kern_return_t           kr;
7286                 vm_object_offset_t      offset;
7287
7288                 offset = vm_object_trunc_page(copy->offset);
7289                 size = vm_map_round_page(copy->size +
7290                                          (vm_map_size_t)(copy->offset - offset));
7291                 *dst_addr = 0;
7292                 kr = vm_map_enter(dst_map, dst_addr, size,
7293                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
7294                                   object, offset, FALSE,
7295                                   VM_PROT_DEFAULT, VM_PROT_ALL,
7296                                   VM_INHERIT_DEFAULT);
7297                 if (kr != KERN_SUCCESS)
7298                         return(kr);
7299                 /* Account for non-pagealigned copy object */
7300                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
7301                 zfree(vm_map_copy_zone, copy);
7302                 return(KERN_SUCCESS);
7303         }
7304
7305         /*
7306          *      Check for special kernel buffer allocated
7307          *      by new_ipc_kmsg_copyin.
7308          */
7309
7310         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
7311                 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr,
7312                                                     copy, FALSE));
7313         }
7314
7315         /*
7316          *      Find space for the data
7317          */
7318
7319         vm_copy_start = vm_object_trunc_page(copy->offset);
7320         size =  vm_map_round_page((vm_map_size_t)copy->offset + copy->size)
7321                 - vm_copy_start;
7322
7323 StartAgain: ;
7324
7325         vm_map_lock(dst_map);
7326         if( dst_map->disable_vmentry_reuse == TRUE) {
7327                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
7328                 last = entry;
7329         } else {
7330                 assert(first_free_is_valid(dst_map));
7331                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
7332                 vm_map_min(dst_map) : last->vme_end;
7333         }
7334
7335         while (TRUE) {
7336                 vm_map_entry_t  next = last->vme_next;
7337                 vm_map_offset_t end = start + size;
7338
7339                 if ((end > dst_map->max_offset) || (end < start)) {
7340                         if (dst_map->wait_for_space) {
7341                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
7342                                         assert_wait((event_t) dst_map,
7343                                                     THREAD_INTERRUPTIBLE);
7344                                         vm_map_unlock(dst_map);
7345                                         thread_block(THREAD_CONTINUE_NULL);
7346                                         goto StartAgain;
7347                                 }
7348                         }
7349                         vm_map_unlock(dst_map);
7350                         return(KERN_NO_SPACE);
7351                 }
7352
7353                 if ((next == vm_map_to_entry(dst_map)) ||
7354                     (next->vme_start >= end))
7355                         break;
7356
7357                 last = next;
7358                 start = last->vme_end;
7359         }
7360
7361         /*
7362          *      Since we're going to just drop the map
7363          *      entries from the copy into the destination
7364          *      map, they must come from the same pool.
7365          */
7366
7367         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
7368                 /*
7369                  * Mismatches occur when dealing with the default
7370                  * pager.
7371                  */
7372                 zone_t          old_zone;
7373                 vm_map_entry_t  next, new;
7374
7375                 /*
7376                  * Find the zone that the copies were allocated from
7377                  */
7378
7379                 entry = vm_map_copy_first_entry(copy);
7380
7381                 /*
7382                  * Reinitialize the copy so that vm_map_copy_entry_link
7383                  * will work.
7384                  */
7385                 vm_map_store_copy_reset(copy, entry);
7386                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
7387
7388                 /*
7389                  * Copy each entry.
7390                  */
7391                 while (entry != vm_map_copy_to_entry(copy)) {
7392                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7393                         vm_map_entry_copy_full(new, entry);
7394                         new->use_pmap = FALSE;  /* clr address space specifics */
7395                         vm_map_copy_entry_link(copy,
7396                                                vm_map_copy_last_entry(copy),
7397                                                new);
7398                         next = entry->vme_next;
7399                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
7400                         zfree(old_zone, entry);
7401                         entry = next;
7402                 }
7403         }
7404
7405         /*
7406          *      Adjust the addresses in the copy chain, and
7407          *      reset the region attributes.
7408          */
7409
7410         adjustment = start - vm_copy_start;
7411         for (entry = vm_map_copy_first_entry(copy);
7412              entry != vm_map_copy_to_entry(copy);
7413              entry = entry->vme_next) {
7414                 entry->vme_start += adjustment;
7415                 entry->vme_end += adjustment;
7416
7417                 entry->inheritance = VM_INHERIT_DEFAULT;
7418                 entry->protection = VM_PROT_DEFAULT;
7419                 entry->max_protection = VM_PROT_ALL;
7420                 entry->behavior = VM_BEHAVIOR_DEFAULT;
7421
7422                 /*
7423                  * If the entry is now wired,
7424                  * map the pages into the destination map.
7425                  */
7426                 if (entry->wired_count != 0) {
7427                         register vm_map_offset_t va;
7428                         vm_object_offset_t       offset;
7429                         register vm_object_t object;
7430                         vm_prot_t prot;
7431                         int     type_of_fault;
7432
7433                         object = entry->object.vm_object;
7434                         offset = entry->offset;
7435                         va = entry->vme_start;
7436
7437                         pmap_pageable(dst_map->pmap,
7438                                       entry->vme_start,
7439                                       entry->vme_end,
7440                                       TRUE);
7441
7442                         while (va < entry->vme_end) {
7443                                 register vm_page_t      m;
7444
7445                                 /*
7446                                  * Look up the page in the object.
7447                                  * Assert that the page will be found in the
7448                                  * top object:
7449                                  * either
7450                                  *      the object was newly created by
7451                                  *      vm_object_copy_slowly, and has
7452                                  *      copies of all of the pages from
7453                                  *      the source object
7454                                  * or
7455                                  *      the object was moved from the old
7456                                  *      map entry; because the old map
7457                                  *      entry was wired, all of the pages
7458                                  *      were in the top-level object.
7459                                  *      (XXX not true if we wire pages for
7460                                  *       reading)
7461                                  */
7462                                 vm_object_lock(object);
7463
7464                                 m = vm_page_lookup(object, offset);
7465                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
7466                                     m->absent)
7467                                         panic("vm_map_copyout: wiring %p", m);
7468
7469                                 /*
7470                                  * ENCRYPTED SWAP:
7471                                  * The page is assumed to be wired here, so it
7472                                  * shouldn't be encrypted.  Otherwise, we
7473                                  * couldn't enter it in the page table, since
7474                                  * we don't want the user to see the encrypted
7475                                  * data.
7476                                  */
7477                                 ASSERT_PAGE_DECRYPTED(m);
7478
7479                                 prot = entry->protection;
7480
7481                                 if (override_nx(dst_map, entry->alias) && prot)
7482                                         prot |= VM_PROT_EXECUTE;
7483
7484                                 type_of_fault = DBG_CACHE_HIT_FAULT;
7485
7486                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
7487                                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL,
7488                                                &type_of_fault);
7489
7490                                 vm_object_unlock(object);
7491
7492                                 offset += PAGE_SIZE_64;
7493                                 va += PAGE_SIZE;
7494                         }
7495                 }
7496         }
7497
7498         /*
7499          *      Correct the page alignment for the result
7500          */
7501
7502         *dst_addr = start + (copy->offset - vm_copy_start);
7503
7504         /*
7505          *      Update the hints and the map size
7506          */
7507
7508         SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
7509
7510         dst_map->size += size;
7511
7512         /*
7513          *      Link in the copy
7514          */
7515
7516         vm_map_copy_insert(dst_map, last, copy);
7517
7518         vm_map_unlock(dst_map);
7519
7520         /*
7521          * XXX  If wiring_required, call vm_map_pageable
7522          */
7523
7524         return(KERN_SUCCESS);
7525 }
7526
7527 /*
7528  *      Routine:        vm_map_copyin
7529  *
7530  *      Description:
7531  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
7532  *
7533  */
7534
7535 #undef vm_map_copyin
7536
7537 kern_return_t
7538 vm_map_copyin(
7539         vm_map_t                        src_map,
7540         vm_map_address_t        src_addr,
7541         vm_map_size_t           len,
7542         boolean_t                       src_destroy,
7543         vm_map_copy_t           *copy_result)   /* OUT */
7544 {
7545         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
7546                                         FALSE, copy_result, FALSE));
7547 }
7548
7549 /*
7550  *      Routine:        vm_map_copyin_common
7551  *
7552  *      Description:
7553  *              Copy the specified region (src_addr, len) from the
7554  *              source address space (src_map), possibly removing
7555  *              the region from the source address space (src_destroy).
7556  *
7557  *      Returns:
7558  *              A vm_map_copy_t object (copy_result), suitable for
7559  *              insertion into another address space (using vm_map_copyout),
7560  *              copying over another address space region (using
7561  *              vm_map_copy_overwrite).  If the copy is unused, it
7562  *              should be destroyed (using vm_map_copy_discard).
7563  *
7564  *      In/out conditions:
7565  *              The source map should not be locked on entry.
7566  */
7567
7568 typedef struct submap_map {
7569         vm_map_t        parent_map;
7570         vm_map_offset_t base_start;
7571         vm_map_offset_t base_end;
7572         vm_map_size_t   base_len;
7573         struct submap_map *next;
7574 } submap_map_t;
7575
7576 kern_return_t
7577 vm_map_copyin_common(
7578         vm_map_t        src_map,
7579         vm_map_address_t src_addr,
7580         vm_map_size_t   len,
7581         boolean_t       src_destroy,
7582         __unused boolean_t      src_volatile,
7583         vm_map_copy_t   *copy_result,   /* OUT */
7584         boolean_t       use_maxprot)
7585 {
7586         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
7587                                          * in multi-level lookup, this
7588                                          * entry contains the actual
7589                                          * vm_object/offset.
7590                                          */
7591         register
7592         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
7593
7594         vm_map_offset_t src_start;      /* Start of current entry --
7595                                          * where copy is taking place now
7596                                          */
7597         vm_map_offset_t src_end;        /* End of entire region to be
7598                                          * copied */
7599         vm_map_offset_t src_base;
7600         vm_map_t        base_map = src_map;
7601         boolean_t       map_share=FALSE;
7602         submap_map_t    *parent_maps = NULL;
7603
7604         register
7605         vm_map_copy_t   copy;           /* Resulting copy */
7606         vm_map_address_t        copy_addr;
7607
7608         /*
7609          *      Check for copies of zero bytes.
7610          */
7611
7612         if (len == 0) {
7613                 *copy_result = VM_MAP_COPY_NULL;
7614                 return(KERN_SUCCESS);
7615         }
7616
7617         /*
7618          *      Check that the end address doesn't overflow
7619          */
7620         src_end = src_addr + len;
7621         if (src_end < src_addr)
7622                 return KERN_INVALID_ADDRESS;
7623
7624         /*
7625          * If the copy is sufficiently small, use a kernel buffer instead
7626          * of making a virtual copy.  The theory being that the cost of
7627          * setting up VM (and taking C-O-W faults) dominates the copy costs
7628          * for small regions.
7629          */
7630         if ((len < msg_ool_size_small) && !use_maxprot)
7631                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
7632                                                    src_destroy, copy_result);
7633
7634         /*
7635          *      Compute (page aligned) start and end of region
7636          */
7637         src_start = vm_map_trunc_page(src_addr);
7638         src_end = vm_map_round_page(src_end);
7639
7640         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
7641
7642         /*
7643          *      Allocate a header element for the list.
7644          *
7645          *      Use the start and end in the header to
7646          *      remember the endpoints prior to rounding.
7647          */
7648
7649         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7650         vm_map_copy_first_entry(copy) =
7651                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
7652         copy->type = VM_MAP_COPY_ENTRY_LIST;
7653         copy->cpy_hdr.nentries = 0;
7654         copy->cpy_hdr.entries_pageable = TRUE;
7655
7656         vm_map_store_init( &(copy->cpy_hdr) );
7657
7658         copy->offset = src_addr;
7659         copy->size = len;
7660
7661         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7662
7663 #define RETURN(x)                                               \
7664         MACRO_BEGIN                                             \
7665         vm_map_unlock(src_map);                                 \
7666         if(src_map != base_map)                                 \
7667                 vm_map_deallocate(src_map);                     \
7668         if (new_entry != VM_MAP_ENTRY_NULL)                     \
7669                 vm_map_copy_entry_dispose(copy,new_entry);      \
7670         vm_map_copy_discard(copy);                              \
7671         {                                                       \
7672                 submap_map_t    *_ptr;                          \
7673                                                                 \
7674                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
7675                         parent_maps=parent_maps->next;          \
7676                         if (_ptr->parent_map != base_map)       \
7677                                 vm_map_deallocate(_ptr->parent_map);    \
7678                         kfree(_ptr, sizeof(submap_map_t));      \
7679                 }                                               \
7680         }                                                       \
7681         MACRO_RETURN(x);                                        \
7682         MACRO_END
7683
7684         /*
7685          *      Find the beginning of the region.
7686          */
7687
7688         vm_map_lock(src_map);
7689
7690         if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
7691                 RETURN(KERN_INVALID_ADDRESS);
7692         if(!tmp_entry->is_sub_map) {
7693                 vm_map_clip_start(src_map, tmp_entry, src_start);
7694         }
7695         /* set for later submap fix-up */
7696         copy_addr = src_start;
7697
7698         /*
7699          *      Go through entries until we get to the end.
7700          */
7701
7702         while (TRUE) {
7703                 register
7704                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
7705                 vm_map_size_t   src_size;               /* Size of source
7706                                                          * map entry (in both
7707                                                          * maps)
7708                                                          */
7709
7710                 register
7711                 vm_object_t             src_object;     /* Object to copy */
7712                 vm_object_offset_t      src_offset;
7713
7714                 boolean_t       src_needs_copy;         /* Should source map
7715                                                          * be made read-only
7716                                                          * for copy-on-write?
7717                                                          */
7718
7719                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
7720
7721                 boolean_t       was_wired;              /* Was source wired? */
7722                 vm_map_version_t version;               /* Version before locks
7723                                                          * dropped to make copy
7724                                                          */
7725                 kern_return_t   result;                 /* Return value from
7726                                                          * copy_strategically.
7727                                                          */
7728                 while(tmp_entry->is_sub_map) {
7729                         vm_map_size_t submap_len;
7730                         submap_map_t *ptr;
7731
7732                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
7733                         ptr->next = parent_maps;
7734                         parent_maps = ptr;
7735                         ptr->parent_map = src_map;
7736                         ptr->base_start = src_start;
7737                         ptr->base_end = src_end;
7738                         submap_len = tmp_entry->vme_end - src_start;
7739                         if(submap_len > (src_end-src_start))
7740                                 submap_len = src_end-src_start;
7741                         ptr->base_len = submap_len;
7742
7743                         src_start -= tmp_entry->vme_start;
7744                         src_start += tmp_entry->offset;
7745                         src_end = src_start + submap_len;
7746                         src_map = tmp_entry->object.sub_map;
7747                         vm_map_lock(src_map);
7748                         /* keep an outstanding reference for all maps in */
7749                         /* the parents tree except the base map */
7750                         vm_map_reference(src_map);
7751                         vm_map_unlock(ptr->parent_map);
7752                         if (!vm_map_lookup_entry(
7753                                     src_map, src_start, &tmp_entry))
7754                                 RETURN(KERN_INVALID_ADDRESS);
7755                         map_share = TRUE;
7756                         if(!tmp_entry->is_sub_map)
7757                                 vm_map_clip_start(src_map, tmp_entry, src_start);
7758                         src_entry = tmp_entry;
7759                 }
7760                 /* we are now in the lowest level submap... */
7761
7762                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
7763                     (tmp_entry->object.vm_object->phys_contiguous)) {
7764                         /* This is not, supported for now.In future */
7765                         /* we will need to detect the phys_contig   */
7766                         /* condition and then upgrade copy_slowly   */
7767                         /* to do physical copy from the device mem  */
7768                         /* based object. We can piggy-back off of   */
7769                         /* the was wired boolean to set-up the      */
7770                         /* proper handling */
7771                         RETURN(KERN_PROTECTION_FAILURE);
7772                 }
7773                 /*
7774                  *      Create a new address map entry to hold the result.
7775                  *      Fill in the fields from the appropriate source entries.
7776                  *      We must unlock the source map to do this if we need
7777                  *      to allocate a map entry.
7778                  */
7779                 if (new_entry == VM_MAP_ENTRY_NULL) {
7780                         version.main_timestamp = src_map->timestamp;
7781                         vm_map_unlock(src_map);
7782
7783                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
7784
7785                         vm_map_lock(src_map);
7786                         if ((version.main_timestamp + 1) != src_map->timestamp) {
7787                                 if (!vm_map_lookup_entry(src_map, src_start,
7788                                                          &tmp_entry)) {
7789                                         RETURN(KERN_INVALID_ADDRESS);
7790                                 }
7791                                 if (!tmp_entry->is_sub_map)
7792                                         vm_map_clip_start(src_map, tmp_entry, src_start);
7793                                 continue; /* restart w/ new tmp_entry */
7794                         }
7795                 }
7796
7797                 /*
7798                  *      Verify that the region can be read.
7799                  */
7800                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
7801                      !use_maxprot) ||
7802                     (src_entry->max_protection & VM_PROT_READ) == 0)
7803                         RETURN(KERN_PROTECTION_FAILURE);
7804
7805                 /*
7806                  *      Clip against the endpoints of the entire region.
7807                  */
7808
7809                 vm_map_clip_end(src_map, src_entry, src_end);
7810
7811                 src_size = src_entry->vme_end - src_start;
7812                 src_object = src_entry->object.vm_object;
7813                 src_offset = src_entry->offset;
7814                 was_wired = (src_entry->wired_count != 0);
7815
7816                 vm_map_entry_copy(new_entry, src_entry);
7817                 new_entry->use_pmap = FALSE; /* clr address space specifics */
7818
7819                 /*
7820                  *      Attempt non-blocking copy-on-write optimizations.
7821                  */
7822
7823                 if (src_destroy &&
7824                     (src_object == VM_OBJECT_NULL ||
7825                      (src_object->internal && !src_object->true_share
7826                       && !map_share))) {
7827                         /*
7828                          * If we are destroying the source, and the object
7829                          * is internal, we can move the object reference
7830                          * from the source to the copy.  The copy is
7831                          * copy-on-write only if the source is.
7832                          * We make another reference to the object, because
7833                          * destroying the source entry will deallocate it.
7834                          */
7835                         vm_object_reference(src_object);
7836
7837                         /*
7838                          * Copy is always unwired.  vm_map_copy_entry
7839                          * set its wired count to zero.
7840                          */
7841
7842                         goto CopySuccessful;
7843                 }
7844
7845
7846         RestartCopy:
7847                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
7848                     src_object, new_entry, new_entry->object.vm_object,
7849                     was_wired, 0);
7850                 if ((src_object == VM_OBJECT_NULL ||
7851                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
7852                     vm_object_copy_quickly(
7853                             &new_entry->object.vm_object,
7854                             src_offset,
7855                             src_size,
7856                             &src_needs_copy,
7857                             &new_entry_needs_copy)) {
7858
7859                         new_entry->needs_copy = new_entry_needs_copy;
7860
7861                         /*
7862                          *      Handle copy-on-write obligations
7863                          */
7864
7865                         if (src_needs_copy && !tmp_entry->needs_copy) {
7866                                 vm_prot_t prot;
7867
7868                                 prot = src_entry->protection & ~VM_PROT_WRITE;
7869
7870                                 if (override_nx(src_map, src_entry->alias) && prot)
7871                                         prot |= VM_PROT_EXECUTE;
7872
7873                                 vm_object_pmap_protect(
7874                                         src_object,
7875                                         src_offset,
7876                                         src_size,
7877                                         (src_entry->is_shared ?
7878                                          PMAP_NULL
7879                                          : src_map->pmap),
7880                                         src_entry->vme_start,
7881                                         prot);
7882
7883                                 tmp_entry->needs_copy = TRUE;
7884                         }
7885
7886                         /*
7887                          *      The map has never been unlocked, so it's safe
7888                          *      to move to the next entry rather than doing
7889                          *      another lookup.
7890                          */
7891
7892                         goto CopySuccessful;
7893                 }
7894
7895                 /*
7896                  *      Take an object reference, so that we may
7897                  *      release the map lock(s).
7898                  */
7899
7900                 assert(src_object != VM_OBJECT_NULL);
7901                 vm_object_reference(src_object);
7902
7903                 /*
7904                  *      Record the timestamp for later verification.
7905                  *      Unlock the map.
7906                  */
7907
7908                 version.main_timestamp = src_map->timestamp;
7909                 vm_map_unlock(src_map); /* Increments timestamp once! */
7910
7911                 /*
7912                  *      Perform the copy
7913                  */
7914
7915                 if (was_wired) {
7916                 CopySlowly:
7917                         vm_object_lock(src_object);
7918                         result = vm_object_copy_slowly(
7919                                 src_object,
7920                                 src_offset,
7921                                 src_size,
7922                                 THREAD_UNINT,
7923                                 &new_entry->object.vm_object);
7924                         new_entry->offset = 0;
7925                         new_entry->needs_copy = FALSE;
7926
7927                 }
7928                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7929                          (tmp_entry->is_shared  || map_share)) {
7930                         vm_object_t new_object;
7931
7932                         vm_object_lock_shared(src_object);
7933                         new_object = vm_object_copy_delayed(
7934                                 src_object,
7935                                 src_offset,
7936                                 src_size,
7937                                 TRUE);
7938                         if (new_object == VM_OBJECT_NULL)
7939                                 goto CopySlowly;
7940
7941                         new_entry->object.vm_object = new_object;
7942                         new_entry->needs_copy = TRUE;
7943                         result = KERN_SUCCESS;
7944
7945                 } else {
7946                         result = vm_object_copy_strategically(src_object,
7947                                                               src_offset,
7948                                                               src_size,
7949                                                               &new_entry->object.vm_object,
7950                                                               &new_entry->offset,
7951                                                               &new_entry_needs_copy);
7952
7953                         new_entry->needs_copy = new_entry_needs_copy;
7954                 }
7955
7956                 if (result != KERN_SUCCESS &&
7957                     result != KERN_MEMORY_RESTART_COPY) {
7958                         vm_map_lock(src_map);
7959                         RETURN(result);
7960                 }
7961
7962                 /*
7963                  *      Throw away the extra reference
7964                  */
7965
7966                 vm_object_deallocate(src_object);
7967
7968                 /*
7969                  *      Verify that the map has not substantially
7970                  *      changed while the copy was being made.
7971                  */
7972
7973                 vm_map_lock(src_map);
7974
7975                 if ((version.main_timestamp + 1) == src_map->timestamp)
7976                         goto VerificationSuccessful;
7977
7978                 /*
7979                  *      Simple version comparison failed.
7980                  *
7981                  *      Retry the lookup and verify that the
7982                  *      same object/offset are still present.
7983                  *
7984                  *      [Note: a memory manager that colludes with
7985                  *      the calling task can detect that we have
7986                  *      cheated.  While the map was unlocked, the
7987                  *      mapping could have been changed and restored.]
7988                  */
7989
7990                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
7991                         RETURN(KERN_INVALID_ADDRESS);
7992                 }
7993
7994                 src_entry = tmp_entry;
7995                 vm_map_clip_start(src_map, src_entry, src_start);
7996
7997                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
7998                      !use_maxprot) ||
7999                     ((src_entry->max_protection & VM_PROT_READ) == 0))
8000                         goto VerificationFailed;
8001
8002                 if (src_entry->vme_end < new_entry->vme_end)
8003                         src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
8004
8005                 if ((src_entry->object.vm_object != src_object) ||
8006                     (src_entry->offset != src_offset) ) {
8007
8008                         /*
8009                          *      Verification failed.
8010                          *
8011                          *      Start over with this top-level entry.
8012                          */
8013
8014                 VerificationFailed: ;
8015
8016                         vm_object_deallocate(new_entry->object.vm_object);
8017                         tmp_entry = src_entry;
8018                         continue;
8019                 }
8020
8021                 /*
8022                  *      Verification succeeded.
8023                  */
8024
8025         VerificationSuccessful: ;
8026
8027                 if (result == KERN_MEMORY_RESTART_COPY)
8028                         goto RestartCopy;
8029
8030                 /*
8031                  *      Copy succeeded.
8032                  */
8033
8034         CopySuccessful: ;
8035
8036                 /*
8037                  *      Link in the new copy entry.
8038                  */
8039
8040                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
8041                                        new_entry);
8042
8043                 /*
8044                  *      Determine whether the entire region
8045                  *      has been copied.
8046                  */
8047                 src_base = src_start;
8048                 src_start = new_entry->vme_end;
8049                 new_entry = VM_MAP_ENTRY_NULL;
8050                 while ((src_start >= src_end) && (src_end != 0)) {
8051                         if (src_map != base_map) {
8052                                 submap_map_t    *ptr;
8053
8054                                 ptr = parent_maps;
8055                                 assert(ptr != NULL);
8056                                 parent_maps = parent_maps->next;
8057
8058                                 /* fix up the damage we did in that submap */
8059                                 vm_map_simplify_range(src_map,
8060                                                       src_base,
8061                                                       src_end);
8062
8063                                 vm_map_unlock(src_map);
8064                                 vm_map_deallocate(src_map);
8065                                 vm_map_lock(ptr->parent_map);
8066                                 src_map = ptr->parent_map;
8067                                 src_base = ptr->base_start;
8068                                 src_start = ptr->base_start + ptr->base_len;
8069                                 src_end = ptr->base_end;
8070                                 if ((src_end > src_start) &&
8071                                     !vm_map_lookup_entry(
8072                                             src_map, src_start, &tmp_entry))
8073                                         RETURN(KERN_INVALID_ADDRESS);
8074                                 kfree(ptr, sizeof(submap_map_t));
8075                                 if(parent_maps == NULL)
8076                                         map_share = FALSE;
8077                                 src_entry = tmp_entry->vme_prev;
8078                         } else
8079                                 break;
8080                 }
8081                 if ((src_start >= src_end) && (src_end != 0))
8082                         break;
8083
8084                 /*
8085                  *      Verify that there are no gaps in the region
8086                  */
8087
8088                 tmp_entry = src_entry->vme_next;
8089                 if ((tmp_entry->vme_start != src_start) ||
8090                     (tmp_entry == vm_map_to_entry(src_map)))
8091                         RETURN(KERN_INVALID_ADDRESS);
8092         }
8093
8094         /*
8095          * If the source should be destroyed, do it now, since the
8096          * copy was successful.
8097          */
8098         if (src_destroy) {
8099                 (void) vm_map_delete(src_map,
8100                                      vm_map_trunc_page(src_addr),
8101                                      src_end,
8102                                      (src_map == kernel_map) ?
8103                                      VM_MAP_REMOVE_KUNWIRE :
8104                                      VM_MAP_NO_FLAGS,
8105                                      VM_MAP_NULL);
8106         } else {
8107                 /* fix up the damage we did in the base map */
8108                 vm_map_simplify_range(src_map,
8109                                       vm_map_trunc_page(src_addr),
8110                                       vm_map_round_page(src_end));
8111         }
8112
8113         vm_map_unlock(src_map);
8114
8115         /* Fix-up start and end points in copy.  This is necessary */
8116         /* when the various entries in the copy object were picked */
8117         /* up from different sub-maps */
8118
8119         tmp_entry = vm_map_copy_first_entry(copy);
8120         while (tmp_entry != vm_map_copy_to_entry(copy)) {
8121                 tmp_entry->vme_end = copy_addr +
8122                         (tmp_entry->vme_end - tmp_entry->vme_start);
8123                 tmp_entry->vme_start = copy_addr;
8124                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
8125                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
8126                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
8127         }
8128
8129         *copy_result = copy;
8130         return(KERN_SUCCESS);
8131
8132 #undef  RETURN
8133 }
8134
8135 /*
8136  *      vm_map_copyin_object:
8137  *
8138  *      Create a copy object from an object.
8139  *      Our caller donates an object reference.
8140  */
8141
8142 kern_return_t
8143 vm_map_copyin_object(
8144         vm_object_t             object,
8145         vm_object_offset_t      offset, /* offset of region in object */
8146         vm_object_size_t        size,   /* size of region in object */
8147         vm_map_copy_t   *copy_result)   /* OUT */
8148 {
8149         vm_map_copy_t   copy;           /* Resulting copy */
8150
8151         /*
8152          *      We drop the object into a special copy object
8153          *      that contains the object directly.
8154          */
8155
8156         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8157         copy->type = VM_MAP_COPY_OBJECT;
8158         copy->cpy_object = object;
8159         copy->offset = offset;
8160         copy->size = size;
8161
8162         *copy_result = copy;
8163         return(KERN_SUCCESS);
8164 }
8165
8166 static void
8167 vm_map_fork_share(
8168         vm_map_t        old_map,
8169         vm_map_entry_t  old_entry,
8170         vm_map_t        new_map)
8171 {
8172         vm_object_t     object;
8173         vm_map_entry_t  new_entry;
8174
8175         /*
8176          *      New sharing code.  New map entry
8177          *      references original object.  Internal
8178          *      objects use asynchronous copy algorithm for
8179          *      future copies.  First make sure we have
8180          *      the right object.  If we need a shadow,
8181          *      or someone else already has one, then
8182          *      make a new shadow and share it.
8183          */
8184
8185         object = old_entry->object.vm_object;
8186         if (old_entry->is_sub_map) {
8187                 assert(old_entry->wired_count == 0);
8188 #ifndef NO_NESTED_PMAP
8189                 if(old_entry->use_pmap) {
8190                         kern_return_t   result;
8191
8192                         result = pmap_nest(new_map->pmap,
8193                                            (old_entry->object.sub_map)->pmap,
8194                                            (addr64_t)old_entry->vme_start,
8195                                            (addr64_t)old_entry->vme_start,
8196                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
8197                         if(result)
8198                                 panic("vm_map_fork_share: pmap_nest failed!");
8199                 }
8200 #endif  /* NO_NESTED_PMAP */
8201         } else if (object == VM_OBJECT_NULL) {
8202                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
8203                                                             old_entry->vme_start));
8204                 old_entry->offset = 0;
8205                 old_entry->object.vm_object = object;
8206                 assert(!old_entry->needs_copy);
8207         } else if (object->copy_strategy !=
8208                    MEMORY_OBJECT_COPY_SYMMETRIC) {
8209
8210                 /*
8211                  *      We are already using an asymmetric
8212                  *      copy, and therefore we already have
8213                  *      the right object.
8214                  */
8215
8216                 assert(! old_entry->needs_copy);
8217         }
8218         else if (old_entry->needs_copy ||       /* case 1 */
8219                  object->shadowed ||            /* case 2 */
8220                  (!object->true_share &&        /* case 3 */
8221                   !old_entry->is_shared &&
8222                   (object->vo_size >
8223                    (vm_map_size_t)(old_entry->vme_end -
8224                                    old_entry->vme_start)))) {
8225
8226                 /*
8227                  *      We need to create a shadow.
8228                  *      There are three cases here.
8229                  *      In the first case, we need to
8230                  *      complete a deferred symmetrical
8231                  *      copy that we participated in.
8232                  *      In the second and third cases,
8233                  *      we need to create the shadow so
8234                  *      that changes that we make to the
8235                  *      object do not interfere with
8236                  *      any symmetrical copies which
8237                  *      have occured (case 2) or which
8238                  *      might occur (case 3).
8239                  *
8240                  *      The first case is when we had
8241                  *      deferred shadow object creation
8242                  *      via the entry->needs_copy mechanism.
8243                  *      This mechanism only works when
8244                  *      only one entry points to the source
8245                  *      object, and we are about to create
8246                  *      a second entry pointing to the
8247                  *      same object. The problem is that
8248                  *      there is no way of mapping from
8249                  *      an object to the entries pointing
8250                  *      to it. (Deferred shadow creation
8251                  *      works with one entry because occurs
8252                  *      at fault time, and we walk from the
8253                  *      entry to the object when handling
8254                  *      the fault.)
8255                  *
8256                  *      The second case is when the object
8257                  *      to be shared has already been copied
8258                  *      with a symmetric copy, but we point
8259                  *      directly to the object without
8260                  *      needs_copy set in our entry. (This
8261                  *      can happen because different ranges
8262                  *      of an object can be pointed to by
8263                  *      different entries. In particular,
8264                  *      a single entry pointing to an object
8265                  *      can be split by a call to vm_inherit,
8266                  *      which, combined with task_create, can
8267                  *      result in the different entries
8268                  *      having different needs_copy values.)
8269                  *      The shadowed flag in the object allows
8270                  *      us to detect this case. The problem
8271                  *      with this case is that if this object
8272                  *      has or will have shadows, then we
8273                  *      must not perform an asymmetric copy
8274                  *      of this object, since such a copy
8275                  *      allows the object to be changed, which
8276                  *      will break the previous symmetrical
8277                  *      copies (which rely upon the object
8278                  *      not changing). In a sense, the shadowed
8279                  *      flag says "don't change this object".
8280                  *      We fix this by creating a shadow
8281                  *      object for this object, and sharing
8282                  *      that. This works because we are free
8283                  *      to change the shadow object (and thus
8284                  *      to use an asymmetric copy strategy);
8285                  *      this is also semantically correct,
8286                  *      since this object is temporary, and
8287                  *      therefore a copy of the object is
8288                  *      as good as the object itself. (This
8289                  *      is not true for permanent objects,
8290                  *      since the pager needs to see changes,
8291                  *      which won't happen if the changes
8292                  *      are made to a copy.)
8293                  *
8294                  *      The third case is when the object
8295                  *      to be shared has parts sticking
8296                  *      outside of the entry we're working
8297                  *      with, and thus may in the future
8298                  *      be subject to a symmetrical copy.
8299                  *      (This is a preemptive version of
8300                  *      case 2.)
8301                  */
8302                 vm_object_shadow(&old_entry->object.vm_object,
8303                                  &old_entry->offset,
8304                                  (vm_map_size_t) (old_entry->vme_end -
8305                                                   old_entry->vme_start));
8306
8307                 /*
8308                  *      If we're making a shadow for other than
8309                  *      copy on write reasons, then we have
8310                  *      to remove write permission.
8311                  */
8312
8313                 if (!old_entry->needs_copy &&
8314                     (old_entry->protection & VM_PROT_WRITE)) {
8315                         vm_prot_t prot;
8316
8317                         prot = old_entry->protection & ~VM_PROT_WRITE;
8318
8319                         if (override_nx(old_map, old_entry->alias) && prot)
8320                                 prot |= VM_PROT_EXECUTE;
8321
8322                         if (old_map->mapped_in_other_pmaps) {
8323                                 vm_object_pmap_protect(
8324                                         old_entry->object.vm_object,
8325                                         old_entry->offset,
8326                                         (old_entry->vme_end -
8327                                          old_entry->vme_start),
8328                                         PMAP_NULL,
8329                                         old_entry->vme_start,
8330                                         prot);
8331                         } else {
8332                                 pmap_protect(old_map->pmap,
8333                                              old_entry->vme_start,
8334                                              old_entry->vme_end,
8335                                              prot);
8336                         }
8337                 }
8338
8339                 old_entry->needs_copy = FALSE;
8340                 object = old_entry->object.vm_object;
8341         }
8342
8343
8344         /*
8345          *      If object was using a symmetric copy strategy,
8346          *      change its copy strategy to the default
8347          *      asymmetric copy strategy, which is copy_delay
8348          *      in the non-norma case and copy_call in the
8349          *      norma case. Bump the reference count for the
8350          *      new entry.
8351          */
8352
8353         if(old_entry->is_sub_map) {
8354                 vm_map_lock(old_entry->object.sub_map);
8355                 vm_map_reference(old_entry->object.sub_map);
8356                 vm_map_unlock(old_entry->object.sub_map);
8357         } else {
8358                 vm_object_lock(object);
8359                 vm_object_reference_locked(object);
8360                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
8361                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8362                 }
8363                 vm_object_unlock(object);
8364         }
8365
8366         /*
8367          *      Clone the entry, using object ref from above.
8368          *      Mark both entries as shared.
8369          */
8370
8371         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
8372                                                           * map or descendants */
8373         vm_map_entry_copy(new_entry, old_entry);
8374         old_entry->is_shared = TRUE;
8375         new_entry->is_shared = TRUE;
8376
8377         /*
8378          *      Insert the entry into the new map -- we
8379          *      know we're inserting at the end of the new
8380          *      map.
8381          */
8382
8383         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
8384
8385         /*
8386          *      Update the physical map
8387          */
8388
8389         if (old_entry->is_sub_map) {
8390                 /* Bill Angell pmap support goes here */
8391         } else {
8392                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
8393                           old_entry->vme_end - old_entry->vme_start,
8394                           old_entry->vme_start);
8395         }
8396 }
8397
8398 static boolean_t
8399 vm_map_fork_copy(
8400         vm_map_t        old_map,
8401         vm_map_entry_t  *old_entry_p,
8402         vm_map_t        new_map)
8403 {
8404         vm_map_entry_t old_entry = *old_entry_p;
8405         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
8406         vm_map_offset_t start = old_entry->vme_start;
8407         vm_map_copy_t copy;
8408         vm_map_entry_t last = vm_map_last_entry(new_map);
8409
8410         vm_map_unlock(old_map);
8411         /*
8412          *      Use maxprot version of copyin because we
8413          *      care about whether this memory can ever
8414          *      be accessed, not just whether it's accessible
8415          *      right now.
8416          */
8417         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
8418             != KERN_SUCCESS) {
8419                 /*
8420                  *      The map might have changed while it
8421                  *      was unlocked, check it again.  Skip
8422                  *      any blank space or permanently
8423                  *      unreadable region.
8424                  */
8425                 vm_map_lock(old_map);
8426                 if (!vm_map_lookup_entry(old_map, start, &last) ||
8427                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
8428                         last = last->vme_next;
8429                 }
8430                 *old_entry_p = last;
8431
8432                 /*
8433                  * XXX  For some error returns, want to
8434                  * XXX  skip to the next element.  Note
8435                  *      that INVALID_ADDRESS and
8436                  *      PROTECTION_FAILURE are handled above.
8437                  */
8438
8439                 return FALSE;
8440         }
8441
8442         /*
8443          *      Insert the copy into the new map
8444          */
8445
8446         vm_map_copy_insert(new_map, last, copy);
8447
8448         /*
8449          *      Pick up the traversal at the end of
8450          *      the copied region.
8451          */
8452
8453         vm_map_lock(old_map);
8454         start += entry_size;
8455         if (! vm_map_lookup_entry(old_map, start, &last)) {
8456                 last = last->vme_next;
8457         } else {
8458                 if (last->vme_start == start) {
8459                         /*
8460                          * No need to clip here and we don't
8461                          * want to cause any unnecessary
8462                          * unnesting...
8463                          */
8464                 } else {
8465                         vm_map_clip_start(old_map, last, start);
8466                 }
8467         }
8468         *old_entry_p = last;
8469
8470         return TRUE;
8471 }
8472
8473 /*
8474  *      vm_map_fork:
8475  *
8476  *      Create and return a new map based on the old
8477  *      map, according to the inheritance values on the
8478  *      regions in that map.
8479  *
8480  *      The source map must not be locked.
8481  */
8482 vm_map_t
8483 vm_map_fork(
8484         ledger_t        ledger,
8485         vm_map_t        old_map)
8486 {
8487         pmap_t          new_pmap;
8488         vm_map_t        new_map;
8489         vm_map_entry_t  old_entry;
8490         vm_map_size_t   new_size = 0, entry_size;
8491         vm_map_entry_t  new_entry;
8492         boolean_t       src_needs_copy;
8493         boolean_t       new_entry_needs_copy;
8494
8495         new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
8496 #if defined(__i386__) || defined(__x86_64__)
8497                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
8498 #else
8499 #error Unknown architecture.
8500 #endif
8501                                );
8502 #if defined(__i386__)
8503         if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED)
8504                 pmap_set_4GB_pagezero(new_pmap);
8505 #endif
8506
8507         vm_map_reference_swap(old_map);
8508         vm_map_lock(old_map);
8509
8510         new_map = vm_map_create(new_pmap,
8511                                 old_map->min_offset,
8512                                 old_map->max_offset,
8513                                 old_map->hdr.entries_pageable);
8514         for (
8515                 old_entry = vm_map_first_entry(old_map);
8516                 old_entry != vm_map_to_entry(old_map);
8517                 ) {
8518
8519                 entry_size = old_entry->vme_end - old_entry->vme_start;
8520
8521                 switch (old_entry->inheritance) {
8522                 case VM_INHERIT_NONE:
8523                         break;
8524
8525                 case VM_INHERIT_SHARE:
8526                         vm_map_fork_share(old_map, old_entry, new_map);
8527                         new_size += entry_size;
8528                         break;
8529
8530                 case VM_INHERIT_COPY:
8531
8532                         /*
8533                          *      Inline the copy_quickly case;
8534                          *      upon failure, fall back on call
8535                          *      to vm_map_fork_copy.
8536                          */
8537
8538                         if(old_entry->is_sub_map)
8539                                 break;
8540                         if ((old_entry->wired_count != 0) ||
8541                             ((old_entry->object.vm_object != NULL) &&
8542                              (old_entry->object.vm_object->true_share))) {
8543                                 goto slow_vm_map_fork_copy;
8544                         }
8545
8546                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
8547                         vm_map_entry_copy(new_entry, old_entry);
8548                         /* clear address space specifics */
8549                         new_entry->use_pmap = FALSE;
8550
8551                         if (! vm_object_copy_quickly(
8552                                     &new_entry->object.vm_object,
8553                                     old_entry->offset,
8554                                     (old_entry->vme_end -
8555                                      old_entry->vme_start),
8556                                     &src_needs_copy,
8557                                     &new_entry_needs_copy)) {
8558                                 vm_map_entry_dispose(new_map, new_entry);
8559                                 goto slow_vm_map_fork_copy;
8560                         }
8561
8562                         /*
8563                          *      Handle copy-on-write obligations
8564                          */
8565
8566                         if (src_needs_copy && !old_entry->needs_copy) {
8567                                 vm_prot_t prot;
8568
8569                                 prot = old_entry->protection & ~VM_PROT_WRITE;
8570
8571                                 if (override_nx(old_map, old_entry->alias) && prot)
8572                                         prot |= VM_PROT_EXECUTE;
8573
8574                                 vm_object_pmap_protect(
8575                                         old_entry->object.vm_object,
8576                                         old_entry->offset,
8577                                         (old_entry->vme_end -
8578                                          old_entry->vme_start),
8579                                         ((old_entry->is_shared
8580                                           || old_map->mapped_in_other_pmaps)
8581                                          ? PMAP_NULL :
8582                                          old_map->pmap),
8583                                         old_entry->vme_start,
8584                                         prot);
8585
8586                                 old_entry->needs_copy = TRUE;
8587                         }
8588                         new_entry->needs_copy = new_entry_needs_copy;
8589
8590                         /*
8591                          *      Insert the entry at the end
8592                          *      of the map.
8593                          */
8594
8595                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
8596                                           new_entry);
8597                         new_size += entry_size;
8598                         break;
8599
8600                 slow_vm_map_fork_copy:
8601                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
8602                                 new_size += entry_size;
8603                         }
8604                         continue;
8605                 }
8606                 old_entry = old_entry->vme_next;
8607         }
8608
8609         new_map->size = new_size;
8610         vm_map_unlock(old_map);
8611         vm_map_deallocate(old_map);
8612
8613         return(new_map);
8614 }
8615
8616 /*
8617  * vm_map_exec:
8618  *
8619  *      Setup the "new_map" with the proper execution environment according
8620  *      to the type of executable (platform, 64bit, chroot environment).
8621  *      Map the comm page and shared region, etc...
8622  */
8623 kern_return_t
8624 vm_map_exec(
8625         vm_map_t        new_map,
8626         task_t          task,
8627         void            *fsroot,
8628         cpu_type_t      cpu)
8629 {
8630         SHARED_REGION_TRACE_DEBUG(
8631                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
8632                  current_task(), new_map, task, fsroot, cpu));
8633         (void) vm_commpage_enter(new_map, task);
8634         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
8635         SHARED_REGION_TRACE_DEBUG(
8636                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
8637                  current_task(), new_map, task, fsroot, cpu));
8638         return KERN_SUCCESS;
8639 }
8640
8641 /*
8642  *      vm_map_lookup_locked:
8643  *
8644  *      Finds the VM object, offset, and
8645  *      protection for a given virtual address in the
8646  *      specified map, assuming a page fault of the
8647  *      type specified.
8648  *
8649  *      Returns the (object, offset, protection) for
8650  *      this address, whether it is wired down, and whether
8651  *      this map has the only reference to the data in question.
8652  *      In order to later verify this lookup, a "version"
8653  *      is returned.
8654  *
8655  *      The map MUST be locked by the caller and WILL be
8656  *      locked on exit.  In order to guarantee the
8657  *      existence of the returned object, it is returned
8658  *      locked.
8659  *
8660  *      If a lookup is requested with "write protection"
8661  *      specified, the map may be changed to perform virtual
8662  *      copying operations, although the data referenced will
8663  *      remain the same.
8664  */
8665 kern_return_t
8666 vm_map_lookup_locked(
8667         vm_map_t                *var_map,       /* IN/OUT */
8668         vm_map_offset_t         vaddr,
8669         vm_prot_t               fault_type,
8670         int                     object_lock_type,
8671         vm_map_version_t        *out_version,   /* OUT */
8672         vm_object_t             *object,        /* OUT */
8673         vm_object_offset_t      *offset,        /* OUT */
8674         vm_prot_t               *out_prot,      /* OUT */
8675         boolean_t               *wired,         /* OUT */
8676         vm_object_fault_info_t  fault_info,     /* OUT */
8677         vm_map_t                *real_map)
8678 {
8679         vm_map_entry_t                  entry;
8680         register vm_map_t               map = *var_map;
8681         vm_map_t                        old_map = *var_map;
8682         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
8683         vm_map_offset_t                 cow_parent_vaddr = 0;
8684         vm_map_offset_t                 old_start = 0;
8685         vm_map_offset_t                 old_end = 0;
8686         register vm_prot_t              prot;
8687         boolean_t                       mask_protections;
8688         vm_prot_t                       original_fault_type;
8689
8690         /*
8691          * VM_PROT_MASK means that the caller wants us to use "fault_type"
8692          * as a mask against the mapping's actual protections, not as an
8693          * absolute value.
8694          */
8695         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
8696         fault_type &= ~VM_PROT_IS_MASK;
8697         original_fault_type = fault_type;
8698
8699         *real_map = map;
8700
8701 RetryLookup:
8702         fault_type = original_fault_type;
8703
8704         /*
8705          *      If the map has an interesting hint, try it before calling
8706          *      full blown lookup routine.
8707          */
8708         entry = map->hint;
8709
8710         if ((entry == vm_map_to_entry(map)) ||
8711             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
8712                 vm_map_entry_t  tmp_entry;
8713
8714                 /*
8715                  *      Entry was either not a valid hint, or the vaddr
8716                  *      was not contained in the entry, so do a full lookup.
8717                  */
8718                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
8719                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
8720                                 vm_map_unlock(cow_sub_map_parent);
8721                         if((*real_map != map)
8722                            && (*real_map != cow_sub_map_parent))
8723                                 vm_map_unlock(*real_map);
8724                         return KERN_INVALID_ADDRESS;
8725                 }
8726
8727                 entry = tmp_entry;
8728         }
8729         if(map == old_map) {
8730                 old_start = entry->vme_start;
8731                 old_end = entry->vme_end;
8732         }
8733
8734         /*
8735          *      Handle submaps.  Drop lock on upper map, submap is
8736          *      returned locked.
8737          */
8738
8739 submap_recurse:
8740         if (entry->is_sub_map) {
8741                 vm_map_offset_t         local_vaddr;
8742                 vm_map_offset_t         end_delta;
8743                 vm_map_offset_t         start_delta;
8744                 vm_map_entry_t          submap_entry;
8745                 boolean_t               mapped_needs_copy=FALSE;
8746
8747                 local_vaddr = vaddr;
8748
8749                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
8750                         /* if real_map equals map we unlock below */
8751                         if ((*real_map != map) &&
8752                             (*real_map != cow_sub_map_parent))
8753                                 vm_map_unlock(*real_map);
8754                         *real_map = entry->object.sub_map;
8755                 }
8756
8757                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
8758                         if (!mapped_needs_copy) {
8759                                 if (vm_map_lock_read_to_write(map)) {
8760                                         vm_map_lock_read(map);
8761                                         /* XXX FBDP: entry still valid ? */
8762                                         if(*real_map == entry->object.sub_map)
8763                                                 *real_map = map;
8764                                         goto RetryLookup;
8765                                 }
8766                                 vm_map_lock_read(entry->object.sub_map);
8767                                 cow_sub_map_parent = map;
8768                                 /* reset base to map before cow object */
8769                                 /* this is the map which will accept   */
8770                                 /* the new cow object */
8771                                 old_start = entry->vme_start;
8772                                 old_end = entry->vme_end;
8773                                 cow_parent_vaddr = vaddr;
8774                                 mapped_needs_copy = TRUE;
8775                         } else {
8776                                 vm_map_lock_read(entry->object.sub_map);
8777                                 if((cow_sub_map_parent != map) &&
8778                                    (*real_map != map))
8779                                         vm_map_unlock(map);
8780                         }
8781                 } else {
8782                         vm_map_lock_read(entry->object.sub_map);
8783                         /* leave map locked if it is a target */
8784                         /* cow sub_map above otherwise, just  */
8785                         /* follow the maps down to the object */
8786                         /* here we unlock knowing we are not  */
8787                         /* revisiting the map.  */
8788                         if((*real_map != map) && (map != cow_sub_map_parent))
8789                                 vm_map_unlock_read(map);
8790                 }
8791
8792                 /* XXX FBDP: map has been unlocked, what protects "entry" !? */
8793                 *var_map = map = entry->object.sub_map;
8794
8795                 /* calculate the offset in the submap for vaddr */
8796                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
8797
8798         RetrySubMap:
8799                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
8800                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
8801                                 vm_map_unlock(cow_sub_map_parent);
8802                         }
8803                         if((*real_map != map)
8804                            && (*real_map != cow_sub_map_parent)) {
8805                                 vm_map_unlock(*real_map);
8806                         }
8807                         *real_map = map;
8808                         return KERN_INVALID_ADDRESS;
8809                 }
8810
8811                 /* find the attenuated shadow of the underlying object */
8812                 /* on our target map */
8813
8814                 /* in english the submap object may extend beyond the     */
8815                 /* region mapped by the entry or, may only fill a portion */
8816                 /* of it.  For our purposes, we only care if the object   */
8817                 /* doesn't fill.  In this case the area which will        */
8818                 /* ultimately be clipped in the top map will only need    */
8819                 /* to be as big as the portion of the underlying entry    */
8820                 /* which is mapped */
8821                 start_delta = submap_entry->vme_start > entry->offset ?
8822                         submap_entry->vme_start - entry->offset : 0;
8823
8824                 end_delta =
8825                         (entry->offset + start_delta + (old_end - old_start)) <=
8826                         submap_entry->vme_end ?
8827                         0 : (entry->offset +
8828                              (old_end - old_start))
8829                         - submap_entry->vme_end;
8830
8831                 old_start += start_delta;
8832                 old_end -= end_delta;
8833
8834                 if(submap_entry->is_sub_map) {
8835                         entry = submap_entry;
8836                         vaddr = local_vaddr;
8837                         goto submap_recurse;
8838                 }
8839
8840                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
8841
8842                         vm_object_t     sub_object, copy_object;
8843                         vm_object_offset_t copy_offset;
8844                         vm_map_offset_t local_start;
8845                         vm_map_offset_t local_end;
8846                         boolean_t               copied_slowly = FALSE;
8847
8848                         if (vm_map_lock_read_to_write(map)) {
8849                                 vm_map_lock_read(map);
8850                                 old_start -= start_delta;
8851                                 old_end += end_delta;
8852                                 goto RetrySubMap;
8853                         }
8854
8855
8856                         sub_object = submap_entry->object.vm_object;
8857                         if (sub_object == VM_OBJECT_NULL) {
8858                                 sub_object =
8859                                         vm_object_allocate(
8860                                                 (vm_map_size_t)
8861                                                 (submap_entry->vme_end -
8862                                                  submap_entry->vme_start));
8863                                 submap_entry->object.vm_object = sub_object;
8864                                 submap_entry->offset = 0;
8865                         }
8866                         local_start =  local_vaddr -
8867                                 (cow_parent_vaddr - old_start);
8868                         local_end = local_vaddr +
8869                                 (old_end - cow_parent_vaddr);
8870                         vm_map_clip_start(map, submap_entry, local_start);
8871                         vm_map_clip_end(map, submap_entry, local_end);
8872                         /* unnesting was done in vm_map_clip_start/end() */
8873                         assert(!submap_entry->use_pmap);
8874
8875                         /* This is the COW case, lets connect */
8876                         /* an entry in our space to the underlying */
8877                         /* object in the submap, bypassing the  */
8878                         /* submap. */
8879
8880
8881                         if(submap_entry->wired_count != 0 ||
8882                            (sub_object->copy_strategy ==
8883                             MEMORY_OBJECT_COPY_NONE)) {
8884                                 vm_object_lock(sub_object);
8885                                 vm_object_copy_slowly(sub_object,
8886                                                       submap_entry->offset,
8887                                                       (submap_entry->vme_end -
8888                                                        submap_entry->vme_start),
8889                                                       FALSE,
8890                                                       &copy_object);
8891                                 copied_slowly = TRUE;
8892                         } else {
8893
8894                                 /* set up shadow object */
8895                                 copy_object = sub_object;
8896                                 vm_object_reference(copy_object);
8897                                 sub_object->shadowed = TRUE;
8898                                 submap_entry->needs_copy = TRUE;
8899
8900                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
8901
8902                                 if (override_nx(old_map, submap_entry->alias) && prot)
8903                                         prot |= VM_PROT_EXECUTE;
8904
8905                                 vm_object_pmap_protect(
8906                                         sub_object,
8907                                         submap_entry->offset,
8908                                         submap_entry->vme_end -
8909                                         submap_entry->vme_start,
8910                                         (submap_entry->is_shared
8911                                          || map->mapped_in_other_pmaps) ?
8912                                         PMAP_NULL : map->pmap,
8913                                         submap_entry->vme_start,
8914                                         prot);
8915                         }
8916
8917                         /*
8918                          * Adjust the fault offset to the submap entry.
8919                          */
8920                         copy_offset = (local_vaddr -
8921                                        submap_entry->vme_start +
8922                                        submap_entry->offset);
8923
8924                         /* This works diffently than the   */
8925                         /* normal submap case. We go back  */
8926                         /* to the parent of the cow map and*/
8927                         /* clip out the target portion of  */
8928                         /* the sub_map, substituting the   */
8929                         /* new copy object,                */
8930
8931                         vm_map_unlock(map);
8932                         local_start = old_start;
8933                         local_end = old_end;
8934                         map = cow_sub_map_parent;
8935                         *var_map = cow_sub_map_parent;
8936                         vaddr = cow_parent_vaddr;
8937                         cow_sub_map_parent = NULL;
8938
8939                         if(!vm_map_lookup_entry(map,
8940                                                 vaddr, &entry)) {
8941                                 vm_object_deallocate(
8942                                         copy_object);
8943                                 vm_map_lock_write_to_read(map);
8944                                 return KERN_INVALID_ADDRESS;
8945                         }
8946
8947                         /* clip out the portion of space */
8948                         /* mapped by the sub map which   */
8949                         /* corresponds to the underlying */
8950                         /* object */
8951
8952                         /*
8953                          * Clip (and unnest) the smallest nested chunk
8954                          * possible around the faulting address...
8955                          */
8956                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
8957                         local_end = local_start + pmap_nesting_size_min;
8958                         /*
8959                          * ... but don't go beyond the "old_start" to "old_end"
8960                          * range, to avoid spanning over another VM region
8961                          * with a possibly different VM object and/or offset.
8962                          */
8963                         if (local_start < old_start) {
8964                                 local_start = old_start;
8965                         }
8966                         if (local_end > old_end) {
8967                                 local_end = old_end;
8968                         }
8969                         /*
8970                          * Adjust copy_offset to the start of the range.
8971                          */
8972                         copy_offset -= (vaddr - local_start);
8973
8974                         vm_map_clip_start(map, entry, local_start);
8975                         vm_map_clip_end(map, entry, local_end);
8976                         /* unnesting was done in vm_map_clip_start/end() */
8977                         assert(!entry->use_pmap);
8978
8979                         /* substitute copy object for */
8980                         /* shared map entry           */
8981                         vm_map_deallocate(entry->object.sub_map);
8982                         entry->is_sub_map = FALSE;
8983                         entry->object.vm_object = copy_object;
8984
8985                         /* propagate the submap entry's protections */
8986                         entry->protection |= submap_entry->protection;
8987                         entry->max_protection |= submap_entry->max_protection;
8988
8989                         if(copied_slowly) {
8990                                 entry->offset = local_start - old_start;
8991                                 entry->needs_copy = FALSE;
8992                                 entry->is_shared = FALSE;
8993                         } else {
8994                                 entry->offset = copy_offset;
8995                                 entry->needs_copy = TRUE;
8996                                 if(entry->inheritance == VM_INHERIT_SHARE)
8997                                         entry->inheritance = VM_INHERIT_COPY;
8998                                 if (map != old_map)
8999                                         entry->is_shared = TRUE;
9000                         }
9001                         if(entry->inheritance == VM_INHERIT_SHARE)
9002                                 entry->inheritance = VM_INHERIT_COPY;
9003
9004                         vm_map_lock_write_to_read(map);
9005                 } else {
9006                         if((cow_sub_map_parent)
9007                            && (cow_sub_map_parent != *real_map)
9008                            && (cow_sub_map_parent != map)) {
9009                                 vm_map_unlock(cow_sub_map_parent);
9010                         }
9011                         entry = submap_entry;
9012                         vaddr = local_vaddr;
9013                 }
9014         }
9015
9016         /*
9017          *      Check whether this task is allowed to have
9018          *      this page.
9019          */
9020
9021         prot = entry->protection;
9022
9023         if (override_nx(old_map, entry->alias) && prot) {
9024                 /*
9025                  * HACK -- if not a stack, then allow execution
9026                  */
9027                 prot |= VM_PROT_EXECUTE;
9028         }
9029
9030         if (mask_protections) {
9031                 fault_type &= prot;
9032                 if (fault_type == VM_PROT_NONE) {
9033                         goto protection_failure;
9034                 }
9035         }
9036         if ((fault_type & (prot)) != fault_type) {
9037         protection_failure:
9038                 if (*real_map != map) {
9039                         vm_map_unlock(*real_map);
9040                 }
9041                 *real_map = map;
9042
9043                 if ((fault_type & VM_PROT_EXECUTE) && prot)
9044                         log_stack_execution_failure((addr64_t)vaddr, prot);
9045
9046                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
9047                 return KERN_PROTECTION_FAILURE;
9048         }
9049
9050         /*
9051          *      If this page is not pageable, we have to get
9052          *      it for all possible accesses.
9053          */
9054
9055         *wired = (entry->wired_count != 0);
9056         if (*wired)
9057                 fault_type = prot;
9058
9059         /*
9060          *      If the entry was copy-on-write, we either ...
9061          */
9062
9063         if (entry->needs_copy) {
9064                 /*
9065                  *      If we want to write the page, we may as well
9066                  *      handle that now since we've got the map locked.
9067                  *
9068                  *      If we don't need to write the page, we just
9069                  *      demote the permissions allowed.
9070                  */
9071
9072                 if ((fault_type & VM_PROT_WRITE) || *wired) {
9073                         /*
9074                          *      Make a new object, and place it in the
9075                          *      object chain.  Note that no new references
9076                          *      have appeared -- one just moved from the
9077                          *      map to the new object.
9078                          */
9079
9080                         if (vm_map_lock_read_to_write(map)) {
9081                                 vm_map_lock_read(map);
9082                                 goto RetryLookup;
9083                         }
9084                         vm_object_shadow(&entry->object.vm_object,
9085                                          &entry->offset,
9086                                          (vm_map_size_t) (entry->vme_end -
9087                                                           entry->vme_start));
9088
9089                         entry->object.vm_object->shadowed = TRUE;
9090                         entry->needs_copy = FALSE;
9091                         vm_map_lock_write_to_read(map);
9092                 }
9093                 else {
9094                         /*
9095                          *      We're attempting to read a copy-on-write
9096                          *      page -- don't allow writes.
9097                          */
9098
9099                         prot &= (~VM_PROT_WRITE);
9100                 }
9101         }
9102
9103         /*
9104          *      Create an object if necessary.
9105          */
9106         if (entry->object.vm_object == VM_OBJECT_NULL) {
9107
9108                 if (vm_map_lock_read_to_write(map)) {
9109                         vm_map_lock_read(map);
9110                         goto RetryLookup;
9111                 }
9112
9113                 entry->object.vm_object = vm_object_allocate(
9114                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
9115                 entry->offset = 0;
9116                 vm_map_lock_write_to_read(map);
9117         }
9118
9119         /*
9120          *      Return the object/offset from this entry.  If the entry
9121          *      was copy-on-write or empty, it has been fixed up.  Also
9122          *      return the protection.
9123          */
9124
9125         *offset = (vaddr - entry->vme_start) + entry->offset;
9126         *object = entry->object.vm_object;
9127         *out_prot = prot;
9128
9129         if (fault_info) {
9130                 fault_info->interruptible = THREAD_UNINT; /* for now... */
9131                 /* ... the caller will change "interruptible" if needed */
9132                 fault_info->cluster_size = 0;
9133                 fault_info->user_tag = entry->alias;
9134                 fault_info->behavior = entry->behavior;
9135                 fault_info->lo_offset = entry->offset;
9136                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
9137                 fault_info->no_cache  = entry->no_cache;
9138                 fault_info->stealth = FALSE;
9139                 fault_info->io_sync = FALSE;
9140                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
9141                 fault_info->mark_zf_absent = FALSE;
9142                 fault_info->batch_pmap_op = FALSE;
9143         }
9144
9145         /*
9146          *      Lock the object to prevent it from disappearing
9147          */
9148         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
9149                 vm_object_lock(*object);
9150         else
9151                 vm_object_lock_shared(*object);
9152
9153         /*
9154          *      Save the version number
9155          */
9156
9157         out_version->main_timestamp = map->timestamp;
9158
9159         return KERN_SUCCESS;
9160 }
9161
9162
9163 /*
9164  *      vm_map_verify:
9165  *
9166  *      Verifies that the map in question has not changed
9167  *      since the given version.  If successful, the map
9168  *      will not change until vm_map_verify_done() is called.
9169  */
9170 boolean_t
9171 vm_map_verify(
9172         register vm_map_t               map,
9173         register vm_map_version_t       *version)       /* REF */
9174 {
9175         boolean_t       result;
9176
9177         vm_map_lock_read(map);
9178         result = (map->timestamp == version->main_timestamp);
9179
9180         if (!result)
9181                 vm_map_unlock_read(map);
9182
9183         return(result);
9184 }
9185
9186 /*
9187  *      vm_map_verify_done:
9188  *
9189  *      Releases locks acquired by a vm_map_verify.
9190  *
9191  *      This is now a macro in vm/vm_map.h.  It does a
9192  *      vm_map_unlock_read on the map.
9193  */
9194
9195
9196 /*
9197  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
9198  *      Goes away after regular vm_region_recurse function migrates to
9199  *      64 bits
9200  *      vm_region_recurse: A form of vm_region which follows the
9201  *      submaps in a target map
9202  *
9203  */
9204
9205 kern_return_t
9206 vm_map_region_recurse_64(
9207         vm_map_t                 map,
9208         vm_map_offset_t *address,               /* IN/OUT */
9209         vm_map_size_t           *size,                  /* OUT */
9210         natural_t               *nesting_depth, /* IN/OUT */
9211         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
9212         mach_msg_type_number_t  *count) /* IN/OUT */
9213 {
9214         vm_region_extended_info_data_t  extended;
9215         vm_map_entry_t                  tmp_entry;
9216         vm_map_offset_t                 user_address;
9217         unsigned int                    user_max_depth;
9218
9219         /*
9220          * "curr_entry" is the VM map entry preceding or including the
9221          * address we're looking for.
9222          * "curr_map" is the map or sub-map containing "curr_entry".
9223          * "curr_address" is the equivalent of the top map's "user_address"
9224          * in the current map.
9225          * "curr_offset" is the cumulated offset of "curr_map" in the
9226          * target task's address space.
9227          * "curr_depth" is the depth of "curr_map" in the chain of
9228          * sub-maps.
9229          *
9230          * "curr_max_below" and "curr_max_above" limit the range (around
9231          * "curr_address") we should take into account in the current (sub)map.
9232          * They limit the range to what's visible through the map entries
9233          * we've traversed from the top map to the current map.
9234
9235          */
9236         vm_map_entry_t                  curr_entry;
9237         vm_map_address_t                curr_address;
9238         vm_map_offset_t                 curr_offset;
9239         vm_map_t                        curr_map;
9240         unsigned int                    curr_depth;
9241         vm_map_offset_t                 curr_max_below, curr_max_above;
9242         vm_map_offset_t                 curr_skip;
9243
9244         /*
9245          * "next_" is the same as "curr_" but for the VM region immediately
9246          * after the address we're looking for.  We need to keep track of this
9247          * too because we want to return info about that region if the
9248          * address we're looking for is not mapped.
9249          */
9250         vm_map_entry_t                  next_entry;
9251         vm_map_offset_t                 next_offset;
9252         vm_map_offset_t                 next_address;
9253         vm_map_t                        next_map;
9254         unsigned int                    next_depth;
9255         vm_map_offset_t                 next_max_below, next_max_above;
9256         vm_map_offset_t                 next_skip;
9257
9258         boolean_t                       look_for_pages;
9259         vm_region_submap_short_info_64_t short_info;
9260
9261         if (map == VM_MAP_NULL) {
9262                 /* no address space to work on */
9263                 return KERN_INVALID_ARGUMENT;
9264         }
9265
9266         if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) {
9267                 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
9268                         /*
9269                          * "info" structure is not big enough and
9270                          * would overflow
9271                          */
9272                         return KERN_INVALID_ARGUMENT;
9273                 } else {
9274                         look_for_pages = FALSE;
9275                         *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
9276                         short_info = (vm_region_submap_short_info_64_t) submap_info;
9277                         submap_info = NULL;
9278                 }
9279         } else {
9280                 look_for_pages = TRUE;
9281                 *count = VM_REGION_SUBMAP_INFO_COUNT_64;
9282                 short_info = NULL;
9283         }
9284
9285
9286         user_address = *address;
9287         user_max_depth = *nesting_depth;
9288
9289         curr_entry = NULL;
9290         curr_map = map;
9291         curr_address = user_address;
9292         curr_offset = 0;
9293         curr_skip = 0;
9294         curr_depth = 0;
9295         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
9296         curr_max_below = curr_address;
9297
9298         next_entry = NULL;
9299         next_map = NULL;
9300         next_address = 0;
9301         next_offset = 0;
9302         next_skip = 0;
9303         next_depth = 0;
9304         next_max_above = (vm_map_offset_t) -1;
9305         next_max_below = (vm_map_offset_t) -1;
9306
9307         if (not_in_kdp) {
9308                 vm_map_lock_read(curr_map);
9309         }
9310
9311         for (;;) {
9312                 if (vm_map_lookup_entry(curr_map,
9313                                         curr_address,
9314                                         &tmp_entry)) {
9315                         /* tmp_entry contains the address we're looking for */
9316                         curr_entry = tmp_entry;
9317                 } else {
9318                         vm_map_offset_t skip;
9319                         /*
9320                          * The address is not mapped.  "tmp_entry" is the
9321                          * map entry preceding the address.  We want the next
9322                          * one, if it exists.
9323                          */
9324                         curr_entry = tmp_entry->vme_next;
9325
9326                         if (curr_entry == vm_map_to_entry(curr_map) ||
9327                             (curr_entry->vme_start >=
9328                              curr_address + curr_max_above)) {
9329                                 /* no next entry at this level: stop looking */
9330                                 if (not_in_kdp) {
9331                                         vm_map_unlock_read(curr_map);
9332                                 }
9333                                 curr_entry = NULL;
9334                                 curr_map = NULL;
9335                                 curr_offset = 0;
9336                                 curr_depth = 0;
9337                                 curr_max_above = 0;
9338                                 curr_max_below = 0;
9339                                 break;
9340                         }
9341
9342                         /* adjust current address and offset */
9343                         skip = curr_entry->vme_start - curr_address;
9344                         curr_address = curr_entry->vme_start;
9345                         curr_skip = skip;
9346                         curr_offset += skip;
9347                         curr_max_above -= skip;
9348                         curr_max_below = 0;
9349                 }
9350
9351                 /*
9352                  * Is the next entry at this level closer to the address (or
9353                  * deeper in the submap chain) than the one we had
9354                  * so far ?
9355                  */
9356                 tmp_entry = curr_entry->vme_next;
9357                 if (tmp_entry == vm_map_to_entry(curr_map)) {
9358                         /* no next entry at this level */
9359                 } else if (tmp_entry->vme_start >=
9360                            curr_address + curr_max_above) {
9361                         /*
9362                          * tmp_entry is beyond the scope of what we mapped of
9363                          * this submap in the upper level: ignore it.
9364                          */
9365                 } else if ((next_entry == NULL) ||
9366                            (tmp_entry->vme_start + curr_offset <=
9367                             next_entry->vme_start + next_offset)) {
9368                         /*
9369                          * We didn't have a "next_entry" or this one is
9370                          * closer to the address we're looking for:
9371                          * use this "tmp_entry" as the new "next_entry".
9372                          */
9373                         if (next_entry != NULL) {
9374                                 /* unlock the last "next_map" */
9375                                 if (next_map != curr_map && not_in_kdp) {
9376                                         vm_map_unlock_read(next_map);
9377                                 }
9378                         }
9379                         next_entry = tmp_entry;
9380                         next_map = curr_map;
9381                         next_depth = curr_depth;
9382                         next_address = next_entry->vme_start;
9383                         next_skip = curr_skip;
9384                         next_offset = curr_offset;
9385                         next_offset += (next_address - curr_address);
9386                         next_max_above = MIN(next_max_above, curr_max_above);
9387                         next_max_above = MIN(next_max_above,
9388                                              next_entry->vme_end - next_address);
9389                         next_max_below = MIN(next_max_below, curr_max_below);
9390                         next_max_below = MIN(next_max_below,
9391                                              next_address - next_entry->vme_start);
9392                 }
9393
9394                 /*
9395                  * "curr_max_{above,below}" allow us to keep track of the
9396                  * portion of the submap that is actually mapped at this level:
9397                  * the rest of that submap is irrelevant to us, since it's not
9398                  * mapped here.
9399                  * The relevant portion of the map starts at
9400                  * "curr_entry->offset" up to the size of "curr_entry".
9401                  */
9402                 curr_max_above = MIN(curr_max_above,
9403                                      curr_entry->vme_end - curr_address);
9404                 curr_max_below = MIN(curr_max_below,
9405                                      curr_address - curr_entry->vme_start);
9406
9407                 if (!curr_entry->is_sub_map ||
9408                     curr_depth >= user_max_depth) {
9409                         /*
9410                          * We hit a leaf map or we reached the maximum depth
9411                          * we could, so stop looking.  Keep the current map
9412                          * locked.
9413                          */
9414                         break;
9415                 }
9416
9417                 /*
9418                  * Get down to the next submap level.
9419                  */
9420
9421                 /*
9422                  * Lock the next level and unlock the current level,
9423                  * unless we need to keep it locked to access the "next_entry"
9424                  * later.
9425                  */
9426                 if (not_in_kdp) {
9427                         vm_map_lock_read(curr_entry->object.sub_map);
9428                 }
9429                 if (curr_map == next_map) {
9430                         /* keep "next_map" locked in case we need it */
9431                 } else {
9432                         /* release this map */
9433                         if (not_in_kdp)
9434                                 vm_map_unlock_read(curr_map);
9435                 }
9436
9437                 /*
9438                  * Adjust the offset.  "curr_entry" maps the submap
9439                  * at relative address "curr_entry->vme_start" in the
9440                  * curr_map but skips the first "curr_entry->offset"
9441                  * bytes of the submap.
9442                  * "curr_offset" always represents the offset of a virtual
9443                  * address in the curr_map relative to the absolute address
9444                  * space (i.e. the top-level VM map).
9445                  */
9446                 curr_offset +=
9447                         (curr_entry->offset - curr_entry->vme_start);
9448                 curr_address = user_address + curr_offset;
9449                 /* switch to the submap */
9450                 curr_map = curr_entry->object.sub_map;
9451                 curr_depth++;
9452                 curr_entry = NULL;
9453         }
9454
9455         if (curr_entry == NULL) {
9456                 /* no VM region contains the address... */
9457                 if (next_entry == NULL) {
9458                         /* ... and no VM region follows it either */
9459                         return KERN_INVALID_ADDRESS;
9460                 }
9461                 /* ... gather info about the next VM region */
9462                 curr_entry = next_entry;
9463                 curr_map = next_map;    /* still locked ... */
9464                 curr_address = next_address;
9465                 curr_skip = next_skip;
9466                 curr_offset = next_offset;
9467                 curr_depth = next_depth;
9468                 curr_max_above = next_max_above;
9469                 curr_max_below = next_max_below;
9470                 if (curr_map == map) {
9471                         user_address = curr_address;
9472                 }
9473         } else {
9474                 /* we won't need "next_entry" after all */
9475                 if (next_entry != NULL) {
9476                         /* release "next_map" */
9477                         if (next_map != curr_map && not_in_kdp) {
9478                                 vm_map_unlock_read(next_map);
9479                         }
9480                 }
9481         }
9482         next_entry = NULL;
9483         next_map = NULL;
9484         next_offset = 0;
9485         next_skip = 0;
9486         next_depth = 0;
9487         next_max_below = -1;
9488         next_max_above = -1;
9489
9490         *nesting_depth = curr_depth;
9491         *size = curr_max_above + curr_max_below;
9492         *address = user_address + curr_skip - curr_max_below;
9493
9494 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
9495 // so probably should be a real 32b ID vs. ptr.
9496 // Current users just check for equality
9497 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)p)
9498
9499         if (look_for_pages) {
9500                 submap_info->user_tag = curr_entry->alias;
9501                 submap_info->offset = curr_entry->offset;
9502                 submap_info->protection = curr_entry->protection;
9503                 submap_info->inheritance = curr_entry->inheritance;
9504                 submap_info->max_protection = curr_entry->max_protection;
9505                 submap_info->behavior = curr_entry->behavior;
9506                 submap_info->user_wired_count = curr_entry->user_wired_count;
9507                 submap_info->is_submap = curr_entry->is_sub_map;
9508                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9509         } else {
9510                 short_info->user_tag = curr_entry->alias;
9511                 short_info->offset = curr_entry->offset;
9512                 short_info->protection = curr_entry->protection;
9513                 short_info->inheritance = curr_entry->inheritance;
9514                 short_info->max_protection = curr_entry->max_protection;
9515                 short_info->behavior = curr_entry->behavior;
9516                 short_info->user_wired_count = curr_entry->user_wired_count;
9517                 short_info->is_submap = curr_entry->is_sub_map;
9518                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
9519         }
9520
9521         extended.pages_resident = 0;
9522         extended.pages_swapped_out = 0;
9523         extended.pages_shared_now_private = 0;
9524         extended.pages_dirtied = 0;
9525         extended.external_pager = 0;
9526         extended.shadow_depth = 0;
9527
9528         if (not_in_kdp) {
9529                 if (!curr_entry->is_sub_map) {
9530                         vm_map_offset_t range_start, range_end;
9531                         range_start = MAX((curr_address - curr_max_below),
9532                                           curr_entry->vme_start);
9533                         range_end = MIN((curr_address + curr_max_above),
9534                                         curr_entry->vme_end);
9535                         vm_map_region_walk(curr_map,
9536                                            range_start,
9537                                            curr_entry,
9538                                            (curr_entry->offset +
9539                                             (range_start -
9540                                              curr_entry->vme_start)),
9541                                            range_end - range_start,
9542                                            &extended,
9543                                            look_for_pages);
9544                         if (extended.external_pager &&
9545                             extended.ref_count == 2 &&
9546                             extended.share_mode == SM_SHARED) {
9547                                 extended.share_mode = SM_PRIVATE;
9548                         }
9549                 } else {
9550                         if (curr_entry->use_pmap) {
9551                                 extended.share_mode = SM_TRUESHARED;
9552                         } else {
9553                                 extended.share_mode = SM_PRIVATE;
9554                         }
9555                         extended.ref_count =
9556                                 curr_entry->object.sub_map->ref_count;
9557                 }
9558         }
9559
9560         if (look_for_pages) {
9561                 submap_info->pages_resident = extended.pages_resident;
9562                 submap_info->pages_swapped_out = extended.pages_swapped_out;
9563                 submap_info->pages_shared_now_private =
9564                         extended.pages_shared_now_private;
9565                 submap_info->pages_dirtied = extended.pages_dirtied;
9566                 submap_info->external_pager = extended.external_pager;
9567                 submap_info->shadow_depth = extended.shadow_depth;
9568                 submap_info->share_mode = extended.share_mode;
9569                 submap_info->ref_count = extended.ref_count;
9570         } else {
9571                 short_info->external_pager = extended.external_pager;
9572                 short_info->shadow_depth = extended.shadow_depth;
9573                 short_info->share_mode = extended.share_mode;
9574                 short_info->ref_count = extended.ref_count;
9575         }
9576
9577         if (not_in_kdp) {
9578                 vm_map_unlock_read(curr_map);
9579         }
9580
9581         return KERN_SUCCESS;
9582 }
9583
9584 /*
9585  *      vm_region:
9586  *
9587  *      User call to obtain information about a region in
9588  *      a task's address map. Currently, only one flavor is
9589  *      supported.
9590  *
9591  *      XXX The reserved and behavior fields cannot be filled
9592  *          in until the vm merge from the IK is completed, and
9593  *          vm_reserve is implemented.
9594  */
9595
9596 kern_return_t
9597 vm_map_region(
9598         vm_map_t                 map,
9599         vm_map_offset_t *address,               /* IN/OUT */
9600         vm_map_size_t           *size,                  /* OUT */
9601         vm_region_flavor_t       flavor,                /* IN */
9602         vm_region_info_t         info,                  /* OUT */
9603         mach_msg_type_number_t  *count, /* IN/OUT */
9604         mach_port_t             *object_name)           /* OUT */
9605 {
9606         vm_map_entry_t          tmp_entry;
9607         vm_map_entry_t          entry;
9608         vm_map_offset_t         start;
9609
9610         if (map == VM_MAP_NULL)
9611                 return(KERN_INVALID_ARGUMENT);
9612
9613         switch (flavor) {
9614
9615         case VM_REGION_BASIC_INFO:
9616                 /* legacy for old 32-bit objects info */
9617         {
9618                 vm_region_basic_info_t  basic;
9619
9620                 if (*count < VM_REGION_BASIC_INFO_COUNT)
9621                         return(KERN_INVALID_ARGUMENT);
9622
9623                 basic = (vm_region_basic_info_t) info;
9624                 *count = VM_REGION_BASIC_INFO_COUNT;
9625
9626                 vm_map_lock_read(map);
9627
9628                 start = *address;
9629                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9630                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9631                                 vm_map_unlock_read(map);
9632                                 return(KERN_INVALID_ADDRESS);
9633                         }
9634                 } else {
9635                         entry = tmp_entry;
9636                 }
9637
9638                 start = entry->vme_start;
9639
9640                 basic->offset = (uint32_t)entry->offset;
9641                 basic->protection = entry->protection;
9642                 basic->inheritance = entry->inheritance;
9643                 basic->max_protection = entry->max_protection;
9644                 basic->behavior = entry->behavior;
9645                 basic->user_wired_count = entry->user_wired_count;
9646                 basic->reserved = entry->is_sub_map;
9647                 *address = start;
9648                 *size = (entry->vme_end - start);
9649
9650                 if (object_name) *object_name = IP_NULL;
9651                 if (entry->is_sub_map) {
9652                         basic->shared = FALSE;
9653                 } else {
9654                         basic->shared = entry->is_shared;
9655                 }
9656
9657                 vm_map_unlock_read(map);
9658                 return(KERN_SUCCESS);
9659         }
9660
9661         case VM_REGION_BASIC_INFO_64:
9662         {
9663                 vm_region_basic_info_64_t       basic;
9664
9665                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
9666                         return(KERN_INVALID_ARGUMENT);
9667
9668                 basic = (vm_region_basic_info_64_t) info;
9669                 *count = VM_REGION_BASIC_INFO_COUNT_64;
9670
9671                 vm_map_lock_read(map);
9672
9673                 start = *address;
9674                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9675                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9676                                 vm_map_unlock_read(map);
9677                                 return(KERN_INVALID_ADDRESS);
9678                         }
9679                 } else {
9680                         entry = tmp_entry;
9681                 }
9682
9683                 start = entry->vme_start;
9684
9685                 basic->offset = entry->offset;
9686                 basic->protection = entry->protection;
9687                 basic->inheritance = entry->inheritance;
9688                 basic->max_protection = entry->max_protection;
9689                 basic->behavior = entry->behavior;
9690                 basic->user_wired_count = entry->user_wired_count;
9691                 basic->reserved = entry->is_sub_map;
9692                 *address = start;
9693                 *size = (entry->vme_end - start);
9694
9695                 if (object_name) *object_name = IP_NULL;
9696                 if (entry->is_sub_map) {
9697                         basic->shared = FALSE;
9698                 } else {
9699                         basic->shared = entry->is_shared;
9700                 }
9701
9702                 vm_map_unlock_read(map);
9703                 return(KERN_SUCCESS);
9704         }
9705         case VM_REGION_EXTENDED_INFO:
9706         {
9707                 vm_region_extended_info_t       extended;
9708
9709                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
9710                         return(KERN_INVALID_ARGUMENT);
9711
9712                 extended = (vm_region_extended_info_t) info;
9713                 *count = VM_REGION_EXTENDED_INFO_COUNT;
9714
9715                 vm_map_lock_read(map);
9716
9717                 start = *address;
9718                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9719                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9720                                 vm_map_unlock_read(map);
9721                                 return(KERN_INVALID_ADDRESS);
9722                         }
9723                 } else {
9724                         entry = tmp_entry;
9725                 }
9726                 start = entry->vme_start;
9727
9728                 extended->protection = entry->protection;
9729                 extended->user_tag = entry->alias;
9730                 extended->pages_resident = 0;
9731                 extended->pages_swapped_out = 0;
9732                 extended->pages_shared_now_private = 0;
9733                 extended->pages_dirtied = 0;
9734                 extended->external_pager = 0;
9735                 extended->shadow_depth = 0;
9736
9737                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE);
9738
9739                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
9740                         extended->share_mode = SM_PRIVATE;
9741
9742                 if (object_name)
9743                         *object_name = IP_NULL;
9744                 *address = start;
9745                 *size = (entry->vme_end - start);
9746
9747                 vm_map_unlock_read(map);
9748                 return(KERN_SUCCESS);
9749         }
9750         case VM_REGION_TOP_INFO:
9751         {
9752                 vm_region_top_info_t    top;
9753
9754                 if (*count < VM_REGION_TOP_INFO_COUNT)
9755                         return(KERN_INVALID_ARGUMENT);
9756
9757                 top = (vm_region_top_info_t) info;
9758                 *count = VM_REGION_TOP_INFO_COUNT;
9759
9760                 vm_map_lock_read(map);
9761
9762                 start = *address;
9763                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
9764                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
9765                                 vm_map_unlock_read(map);
9766                                 return(KERN_INVALID_ADDRESS);
9767                         }
9768                 } else {
9769                         entry = tmp_entry;
9770
9771                 }
9772                 start = entry->vme_start;
9773
9774                 top->private_pages_resident = 0;
9775                 top->shared_pages_resident = 0;
9776
9777                 vm_map_region_top_walk(entry, top);
9778
9779                 if (object_name)
9780                         *object_name = IP_NULL;
9781                 *address = start;
9782                 *size = (entry->vme_end - start);
9783
9784                 vm_map_unlock_read(map);
9785                 return(KERN_SUCCESS);
9786         }
9787         default:
9788                 return(KERN_INVALID_ARGUMENT);
9789         }
9790 }
9791
9792 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
9793         MIN((entry_size),                                               \
9794             ((obj)->all_reusable ?                                      \
9795              (obj)->wired_page_count :                                  \
9796              (obj)->resident_page_count - (obj)->reusable_page_count))
9797
9798 void
9799 vm_map_region_top_walk(
9800         vm_map_entry_t             entry,
9801         vm_region_top_info_t       top)
9802 {
9803
9804         if (entry->object.vm_object == 0 || entry->is_sub_map) {
9805                 top->share_mode = SM_EMPTY;
9806                 top->ref_count = 0;
9807                 top->obj_id = 0;
9808                 return;
9809         }
9810
9811         {
9812                 struct  vm_object *obj, *tmp_obj;
9813                 int             ref_count;
9814                 uint32_t        entry_size;
9815
9816                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
9817
9818                 obj = entry->object.vm_object;
9819
9820                 vm_object_lock(obj);
9821
9822                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9823                         ref_count--;
9824
9825                 assert(obj->reusable_page_count <= obj->resident_page_count);
9826                 if (obj->shadow) {
9827                         if (ref_count == 1)
9828                                 top->private_pages_resident =
9829                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9830                         else
9831                                 top->shared_pages_resident =
9832                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9833                         top->ref_count  = ref_count;
9834                         top->share_mode = SM_COW;
9835
9836                         while ((tmp_obj = obj->shadow)) {
9837                                 vm_object_lock(tmp_obj);
9838                                 vm_object_unlock(obj);
9839                                 obj = tmp_obj;
9840
9841                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9842                                         ref_count--;
9843
9844                                 assert(obj->reusable_page_count <= obj->resident_page_count);
9845                                 top->shared_pages_resident +=
9846                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9847                                 top->ref_count += ref_count - 1;
9848                         }
9849                 } else {
9850                         if (entry->superpage_size) {
9851                                 top->share_mode = SM_LARGE_PAGE;
9852                                 top->shared_pages_resident = 0;
9853                                 top->private_pages_resident = entry_size;
9854                         } else if (entry->needs_copy) {
9855                                 top->share_mode = SM_COW;
9856                                 top->shared_pages_resident =
9857                                         OBJ_RESIDENT_COUNT(obj, entry_size);
9858                         } else {
9859                                 if (ref_count == 1 ||
9860                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
9861                                         top->share_mode = SM_PRIVATE;
9862                                         top->private_pages_resident =
9863                                                 OBJ_RESIDENT_COUNT(obj,
9864                                                                    entry_size);
9865                                 } else {
9866                                         top->share_mode = SM_SHARED;
9867                                         top->shared_pages_resident =
9868                                                 OBJ_RESIDENT_COUNT(obj,
9869                                                                   entry_size);
9870                                 }
9871                         }
9872                         top->ref_count = ref_count;
9873                 }
9874                 /* XXX K64: obj_id will be truncated */
9875                 top->obj_id = (unsigned int) (uintptr_t)obj;
9876
9877                 vm_object_unlock(obj);
9878         }
9879 }
9880
9881 void
9882 vm_map_region_walk(
9883         vm_map_t                        map,
9884         vm_map_offset_t                 va,
9885         vm_map_entry_t                  entry,
9886         vm_object_offset_t              offset,
9887         vm_object_size_t                range,
9888         vm_region_extended_info_t       extended,
9889         boolean_t                       look_for_pages)
9890 {
9891         register struct vm_object *obj, *tmp_obj;
9892         register vm_map_offset_t       last_offset;
9893         register int               i;
9894         register int               ref_count;
9895         struct vm_object        *shadow_object;
9896         int                     shadow_depth;
9897
9898         if ((entry->object.vm_object == 0) ||
9899             (entry->is_sub_map) ||
9900             (entry->object.vm_object->phys_contiguous &&
9901              !entry->superpage_size)) {
9902                 extended->share_mode = SM_EMPTY;
9903                 extended->ref_count = 0;
9904                 return;
9905         }
9906
9907         if (entry->superpage_size) {
9908                 extended->shadow_depth = 0;
9909                 extended->share_mode = SM_LARGE_PAGE;
9910                 extended->ref_count = 1;
9911                 extended->external_pager = 0;
9912                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
9913                 extended->shadow_depth = 0;
9914                 return;
9915         }
9916
9917         {
9918                 obj = entry->object.vm_object;
9919
9920                 vm_object_lock(obj);
9921
9922                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9923                         ref_count--;
9924
9925                 if (look_for_pages) {
9926                         for (last_offset = offset + range;
9927                              offset < last_offset;
9928                              offset += PAGE_SIZE_64, va += PAGE_SIZE)
9929                                 vm_map_region_look_for_page(map, va, obj,
9930                                                             offset, ref_count,
9931                                                             0, extended);
9932                 } else {
9933                         shadow_object = obj->shadow;
9934                         shadow_depth = 0;
9935
9936                         if ( !(obj->pager_trusted) && !(obj->internal))
9937                                 extended->external_pager = 1;
9938
9939                         if (shadow_object != VM_OBJECT_NULL) {
9940                                 vm_object_lock(shadow_object);
9941                                 for (;
9942                                      shadow_object != VM_OBJECT_NULL;
9943                                      shadow_depth++) {
9944                                         vm_object_t     next_shadow;
9945
9946                                         if ( !(shadow_object->pager_trusted) &&
9947                                              !(shadow_object->internal))
9948                                                 extended->external_pager = 1;
9949
9950                                         next_shadow = shadow_object->shadow;
9951                                         if (next_shadow) {
9952                                                 vm_object_lock(next_shadow);
9953                                         }
9954                                         vm_object_unlock(shadow_object);
9955                                         shadow_object = next_shadow;
9956                                 }
9957                         }
9958                         extended->shadow_depth = shadow_depth;
9959                 }
9960
9961                 if (extended->shadow_depth || entry->needs_copy)
9962                         extended->share_mode = SM_COW;
9963                 else {
9964                         if (ref_count == 1)
9965                                 extended->share_mode = SM_PRIVATE;
9966                         else {
9967                                 if (obj->true_share)
9968                                         extended->share_mode = SM_TRUESHARED;
9969                                 else
9970                                         extended->share_mode = SM_SHARED;
9971                         }
9972                 }
9973                 extended->ref_count = ref_count - extended->shadow_depth;
9974
9975                 for (i = 0; i < extended->shadow_depth; i++) {
9976                         if ((tmp_obj = obj->shadow) == 0)
9977                                 break;
9978                         vm_object_lock(tmp_obj);
9979                         vm_object_unlock(obj);
9980
9981                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
9982                                 ref_count--;
9983
9984                         extended->ref_count += ref_count;
9985                         obj = tmp_obj;
9986                 }
9987                 vm_object_unlock(obj);
9988
9989                 if (extended->share_mode == SM_SHARED) {
9990                         register vm_map_entry_t      cur;
9991                         register vm_map_entry_t      last;
9992                         int      my_refs;
9993
9994                         obj = entry->object.vm_object;
9995                         last = vm_map_to_entry(map);
9996                         my_refs = 0;
9997
9998                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
9999                                 ref_count--;
10000                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
10001                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
10002
10003                         if (my_refs == ref_count)
10004                                 extended->share_mode = SM_PRIVATE_ALIASED;
10005                         else if (my_refs > 1)
10006                                 extended->share_mode = SM_SHARED_ALIASED;
10007                 }
10008         }
10009 }
10010
10011
10012 /* object is locked on entry and locked on return */
10013
10014
10015 static void
10016 vm_map_region_look_for_page(
10017         __unused vm_map_t               map,
10018         __unused vm_map_offset_t        va,
10019         vm_object_t                     object,
10020         vm_object_offset_t              offset,
10021         int                             max_refcnt,
10022         int                             depth,
10023         vm_region_extended_info_t       extended)
10024 {
10025         register vm_page_t      p;
10026         register vm_object_t    shadow;
10027         register int            ref_count;
10028         vm_object_t             caller_object;
10029 #if     MACH_PAGEMAP
10030         kern_return_t           kr;
10031 #endif
10032         shadow = object->shadow;
10033         caller_object = object;
10034
10035
10036         while (TRUE) {
10037
10038                 if ( !(object->pager_trusted) && !(object->internal))
10039                         extended->external_pager = 1;
10040
10041                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
10042                         if (shadow && (max_refcnt == 1))
10043                                 extended->pages_shared_now_private++;
10044
10045                         if (!p->fictitious &&
10046                             (p->dirty || pmap_is_modified(p->phys_page)))
10047                                 extended->pages_dirtied++;
10048
10049                         extended->pages_resident++;
10050
10051                         if(object != caller_object)
10052                                 vm_object_unlock(object);
10053
10054                         return;
10055                 }
10056 #if     MACH_PAGEMAP
10057                 if (object->existence_map) {
10058                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
10059
10060                                 extended->pages_swapped_out++;
10061
10062                                 if(object != caller_object)
10063                                         vm_object_unlock(object);
10064
10065                                 return;
10066                         }
10067                 } else if (object->internal &&
10068                            object->alive &&
10069                            !object->terminating &&
10070                            object->pager_ready) {
10071
10072                         memory_object_t pager;
10073
10074                         vm_object_paging_begin(object);
10075                         pager = object->pager;
10076                         vm_object_unlock(object);
10077
10078                         kr = memory_object_data_request(
10079                                 pager,
10080                                 offset + object->paging_offset,
10081                                 0, /* just poke the pager */
10082                                 VM_PROT_READ,
10083                                 NULL);
10084
10085                         vm_object_lock(object);
10086                         vm_object_paging_end(object);
10087
10088                         if (kr == KERN_SUCCESS) {
10089                                 /* the pager has that page */
10090                                 extended->pages_swapped_out++;
10091                                 if (object != caller_object)
10092                                         vm_object_unlock(object);
10093                                 return;
10094                         }
10095                 }
10096 #endif /* MACH_PAGEMAP */
10097
10098                 if (shadow) {
10099                         vm_object_lock(shadow);
10100
10101                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
10102                                 ref_count--;
10103
10104                         if (++depth > extended->shadow_depth)
10105                                 extended->shadow_depth = depth;
10106
10107                         if (ref_count > max_refcnt)
10108                                 max_refcnt = ref_count;
10109
10110                         if(object != caller_object)
10111                                 vm_object_unlock(object);
10112
10113                         offset = offset + object->vo_shadow_offset;
10114                         object = shadow;
10115                         shadow = object->shadow;
10116                         continue;
10117                 }
10118                 if(object != caller_object)
10119                         vm_object_unlock(object);
10120                 break;
10121         }
10122 }
10123
10124 static int
10125 vm_map_region_count_obj_refs(
10126         vm_map_entry_t    entry,
10127         vm_object_t       object)
10128 {
10129         register int ref_count;
10130         register vm_object_t chk_obj;
10131         register vm_object_t tmp_obj;
10132
10133         if (entry->object.vm_object == 0)
10134                 return(0);
10135
10136         if (entry->is_sub_map)
10137                 return(0);
10138         else {
10139                 ref_count = 0;
10140
10141                 chk_obj = entry->object.vm_object;
10142                 vm_object_lock(chk_obj);
10143
10144                 while (chk_obj) {
10145                         if (chk_obj == object)
10146                                 ref_count++;
10147                         tmp_obj = chk_obj->shadow;
10148                         if (tmp_obj)
10149                                 vm_object_lock(tmp_obj);
10150                         vm_object_unlock(chk_obj);
10151
10152                         chk_obj = tmp_obj;
10153                 }
10154         }
10155         return(ref_count);
10156 }
10157
10158
10159 /*
10160  *      Routine:        vm_map_simplify
10161  *
10162  *      Description:
10163  *              Attempt to simplify the map representation in
10164  *              the vicinity of the given starting address.
10165  *      Note:
10166  *              This routine is intended primarily to keep the
10167  *              kernel maps more compact -- they generally don't
10168  *              benefit from the "expand a map entry" technology
10169  *              at allocation time because the adjacent entry
10170  *              is often wired down.
10171  */
10172 void
10173 vm_map_simplify_entry(
10174         vm_map_t        map,
10175         vm_map_entry_t  this_entry)
10176 {
10177         vm_map_entry_t  prev_entry;
10178
10179         counter(c_vm_map_simplify_entry_called++);
10180
10181         prev_entry = this_entry->vme_prev;
10182
10183         if ((this_entry != vm_map_to_entry(map)) &&
10184             (prev_entry != vm_map_to_entry(map)) &&
10185
10186             (prev_entry->vme_end == this_entry->vme_start) &&
10187
10188             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
10189
10190             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
10191             ((prev_entry->offset + (prev_entry->vme_end -
10192                                     prev_entry->vme_start))
10193              == this_entry->offset) &&
10194
10195             (prev_entry->inheritance == this_entry->inheritance) &&
10196             (prev_entry->protection == this_entry->protection) &&
10197             (prev_entry->max_protection == this_entry->max_protection) &&
10198             (prev_entry->behavior == this_entry->behavior) &&
10199             (prev_entry->alias == this_entry->alias) &&
10200             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
10201             (prev_entry->no_cache == this_entry->no_cache) &&
10202             (prev_entry->wired_count == this_entry->wired_count) &&
10203             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
10204
10205             (prev_entry->needs_copy == this_entry->needs_copy) &&
10206             (prev_entry->permanent == this_entry->permanent) &&
10207
10208             (prev_entry->use_pmap == FALSE) &&
10209             (this_entry->use_pmap == FALSE) &&
10210             (prev_entry->in_transition == FALSE) &&
10211             (this_entry->in_transition == FALSE) &&
10212             (prev_entry->needs_wakeup == FALSE) &&
10213             (this_entry->needs_wakeup == FALSE) &&
10214             (prev_entry->is_shared == FALSE) &&
10215             (this_entry->is_shared == FALSE)
10216                 ) {
10217                 vm_map_store_entry_unlink(map, prev_entry);
10218                 assert(prev_entry->vme_start < this_entry->vme_end);
10219                 this_entry->vme_start = prev_entry->vme_start;
10220                 this_entry->offset = prev_entry->offset;
10221                 if (prev_entry->is_sub_map) {
10222                         vm_map_deallocate(prev_entry->object.sub_map);
10223                 } else {
10224                         vm_object_deallocate(prev_entry->object.vm_object);
10225                 }
10226                 vm_map_entry_dispose(map, prev_entry);
10227                 SAVE_HINT_MAP_WRITE(map, this_entry);
10228                 counter(c_vm_map_simplified++);
10229         }
10230 }
10231
10232 void
10233 vm_map_simplify(
10234         vm_map_t        map,
10235         vm_map_offset_t start)
10236 {
10237         vm_map_entry_t  this_entry;
10238
10239         vm_map_lock(map);
10240         if (vm_map_lookup_entry(map, start, &this_entry)) {
10241                 vm_map_simplify_entry(map, this_entry);
10242                 vm_map_simplify_entry(map, this_entry->vme_next);
10243         }
10244         counter(c_vm_map_simplify_called++);
10245         vm_map_unlock(map);
10246 }
10247
10248 static void
10249 vm_map_simplify_range(
10250         vm_map_t        map,
10251         vm_map_offset_t start,
10252         vm_map_offset_t end)
10253 {
10254         vm_map_entry_t  entry;
10255
10256         /*
10257          * The map should be locked (for "write") by the caller.
10258          */
10259
10260         if (start >= end) {
10261                 /* invalid address range */
10262                 return;
10263         }
10264
10265         start = vm_map_trunc_page(start);
10266         end = vm_map_round_page(end);
10267
10268         if (!vm_map_lookup_entry(map, start, &entry)) {
10269                 /* "start" is not mapped and "entry" ends before "start" */
10270                 if (entry == vm_map_to_entry(map)) {
10271                         /* start with first entry in the map */
10272                         entry = vm_map_first_entry(map);
10273                 } else {
10274                         /* start with next entry */
10275                         entry = entry->vme_next;
10276                 }
10277         }
10278
10279         while (entry != vm_map_to_entry(map) &&
10280                entry->vme_start <= end) {
10281                 /* try and coalesce "entry" with its previous entry */
10282                 vm_map_simplify_entry(map, entry);
10283                 entry = entry->vme_next;
10284         }
10285 }
10286
10287
10288 /*
10289  *      Routine:        vm_map_machine_attribute
10290  *      Purpose:
10291  *              Provide machine-specific attributes to mappings,
10292  *              such as cachability etc. for machines that provide
10293  *              them.  NUMA architectures and machines with big/strange
10294  *              caches will use this.
10295  *      Note:
10296  *              Responsibilities for locking and checking are handled here,
10297  *              everything else in the pmap module. If any non-volatile
10298  *              information must be kept, the pmap module should handle
10299  *              it itself. [This assumes that attributes do not
10300  *              need to be inherited, which seems ok to me]
10301  */
10302 kern_return_t
10303 vm_map_machine_attribute(
10304         vm_map_t                        map,
10305         vm_map_offset_t         start,
10306         vm_map_offset_t         end,
10307         vm_machine_attribute_t  attribute,
10308         vm_machine_attribute_val_t* value)              /* IN/OUT */
10309 {
10310         kern_return_t   ret;
10311         vm_map_size_t sync_size;
10312         vm_map_entry_t entry;
10313
10314         if (start < vm_map_min(map) || end > vm_map_max(map))
10315                 return KERN_INVALID_ADDRESS;
10316
10317         /* Figure how much memory we need to flush (in page increments) */
10318         sync_size = end - start;
10319
10320         vm_map_lock(map);
10321
10322         if (attribute != MATTR_CACHE) {
10323                 /* If we don't have to find physical addresses, we */
10324                 /* don't have to do an explicit traversal here.    */
10325                 ret = pmap_attribute(map->pmap, start, end-start,
10326                                      attribute, value);
10327                 vm_map_unlock(map);
10328                 return ret;
10329         }
10330
10331         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
10332
10333         while(sync_size) {
10334                 if (vm_map_lookup_entry(map, start, &entry)) {
10335                         vm_map_size_t   sub_size;
10336                         if((entry->vme_end - start) > sync_size) {
10337                                 sub_size = sync_size;
10338                                 sync_size = 0;
10339                         } else {
10340                                 sub_size = entry->vme_end - start;
10341                                 sync_size -= sub_size;
10342                         }
10343                         if(entry->is_sub_map) {
10344                                 vm_map_offset_t sub_start;
10345                                 vm_map_offset_t sub_end;
10346
10347                                 sub_start = (start - entry->vme_start)
10348                                         + entry->offset;
10349                                 sub_end = sub_start + sub_size;
10350                                 vm_map_machine_attribute(
10351                                         entry->object.sub_map,
10352                                         sub_start,
10353                                         sub_end,
10354                                         attribute, value);
10355                         } else {
10356                                 if(entry->object.vm_object) {
10357                                         vm_page_t               m;
10358                                         vm_object_t             object;
10359                                         vm_object_t             base_object;
10360                                         vm_object_t             last_object;
10361                                         vm_object_offset_t      offset;
10362                                         vm_object_offset_t      base_offset;
10363                                         vm_map_size_t           range;
10364                                         range = sub_size;
10365                                         offset = (start - entry->vme_start)
10366                                                 + entry->offset;
10367                                         base_offset = offset;
10368                                         object = entry->object.vm_object;
10369                                         base_object = object;
10370                                         last_object = NULL;
10371
10372                                         vm_object_lock(object);
10373
10374                                         while (range) {
10375                                                 m = vm_page_lookup(
10376                                                         object, offset);
10377
10378                                                 if (m && !m->fictitious) {
10379                                                         ret =
10380                                                                 pmap_attribute_cache_sync(
10381                                                                         m->phys_page,
10382                                                                         PAGE_SIZE,
10383                                                                         attribute, value);
10384
10385                                                 } else if (object->shadow) {
10386                                                         offset = offset + object->vo_shadow_offset;
10387                                                         last_object = object;
10388                                                         object = object->shadow;
10389                                                         vm_object_lock(last_object->shadow);
10390                                                         vm_object_unlock(last_object);
10391                                                         continue;
10392                                                 }
10393                                                 range -= PAGE_SIZE;
10394
10395                                                 if (base_object != object) {
10396                                                         vm_object_unlock(object);
10397                                                         vm_object_lock(base_object);
10398                                                         object = base_object;
10399                                                 }
10400                                                 /* Bump to the next page */
10401                                                 base_offset += PAGE_SIZE;
10402                                                 offset = base_offset;
10403                                         }
10404                                         vm_object_unlock(object);
10405                                 }
10406                         }
10407                         start += sub_size;
10408                 } else {
10409                         vm_map_unlock(map);
10410                         return KERN_FAILURE;
10411                 }
10412
10413         }
10414
10415         vm_map_unlock(map);
10416
10417         return ret;
10418 }
10419
10420 /*
10421  *      vm_map_behavior_set:
10422  *
10423  *      Sets the paging reference behavior of the specified address
10424  *      range in the target map.  Paging reference behavior affects
10425  *      how pagein operations resulting from faults on the map will be
10426  *      clustered.
10427  */
10428 kern_return_t
10429 vm_map_behavior_set(
10430         vm_map_t        map,
10431         vm_map_offset_t start,
10432         vm_map_offset_t end,
10433         vm_behavior_t   new_behavior)
10434 {
10435         register vm_map_entry_t entry;
10436         vm_map_entry_t  temp_entry;
10437
10438         XPR(XPR_VM_MAP,
10439             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
10440             map, start, end, new_behavior, 0);
10441
10442         if (start > end ||
10443             start < vm_map_min(map) ||
10444             end > vm_map_max(map)) {
10445                 return KERN_NO_SPACE;
10446         }
10447
10448         switch (new_behavior) {
10449
10450         /*
10451          * This first block of behaviors all set a persistent state on the specified
10452          * memory range.  All we have to do here is to record the desired behavior
10453          * in the vm_map_entry_t's.
10454          */
10455
10456         case VM_BEHAVIOR_DEFAULT:
10457         case VM_BEHAVIOR_RANDOM:
10458         case VM_BEHAVIOR_SEQUENTIAL:
10459         case VM_BEHAVIOR_RSEQNTL:
10460         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
10461                 vm_map_lock(map);
10462
10463                 /*
10464                  *      The entire address range must be valid for the map.
10465                  *      Note that vm_map_range_check() does a
10466                  *      vm_map_lookup_entry() internally and returns the
10467                  *      entry containing the start of the address range if
10468                  *      the entire range is valid.
10469                  */
10470                 if (vm_map_range_check(map, start, end, &temp_entry)) {
10471                         entry = temp_entry;
10472                         vm_map_clip_start(map, entry, start);
10473                 }
10474                 else {
10475                         vm_map_unlock(map);
10476                         return(KERN_INVALID_ADDRESS);
10477                 }
10478
10479                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
10480                         vm_map_clip_end(map, entry, end);
10481                         assert(!entry->use_pmap);
10482
10483                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
10484                                 entry->zero_wired_pages = TRUE;
10485                         } else {
10486                                 entry->behavior = new_behavior;
10487                         }
10488                         entry = entry->vme_next;
10489                 }
10490
10491                 vm_map_unlock(map);
10492                 break;
10493
10494         /*
10495          * The rest of these are different from the above in that they cause
10496          * an immediate action to take place as opposed to setting a behavior that
10497          * affects future actions.
10498          */
10499
10500         case VM_BEHAVIOR_WILLNEED:
10501                 return vm_map_willneed(map, start, end);
10502
10503         case VM_BEHAVIOR_DONTNEED:
10504                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
10505
10506         case VM_BEHAVIOR_FREE:
10507                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
10508
10509         case VM_BEHAVIOR_REUSABLE:
10510                 return vm_map_reusable_pages(map, start, end);
10511
10512         case VM_BEHAVIOR_REUSE:
10513                 return vm_map_reuse_pages(map, start, end);
10514
10515         case VM_BEHAVIOR_CAN_REUSE:
10516                 return vm_map_can_reuse(map, start, end);
10517
10518         default:
10519                 return(KERN_INVALID_ARGUMENT);
10520         }
10521
10522         return(KERN_SUCCESS);
10523 }
10524
10525
10526 /*
10527  * Internals for madvise(MADV_WILLNEED) system call.
10528  *
10529  * The present implementation is to do a read-ahead if the mapping corresponds
10530  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
10531  * and basically ignore the "advice" (which we are always free to do).
10532  */
10533
10534
10535 static kern_return_t
10536 vm_map_willneed(
10537         vm_map_t        map,
10538         vm_map_offset_t start,
10539         vm_map_offset_t end
10540 )
10541 {
10542         vm_map_entry_t                  entry;
10543         vm_object_t                     object;
10544         memory_object_t                 pager;
10545         struct vm_object_fault_info     fault_info;
10546         kern_return_t                   kr;
10547         vm_object_size_t                len;
10548         vm_object_offset_t              offset;
10549
10550         /*
10551          * Fill in static values in fault_info.  Several fields get ignored by the code
10552          * we call, but we'll fill them in anyway since uninitialized fields are bad
10553          * when it comes to future backwards compatibility.
10554          */
10555
10556         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
10557         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
10558         fault_info.no_cache      = FALSE;                       /* ignored value */
10559         fault_info.stealth       = TRUE;
10560         fault_info.io_sync = FALSE;
10561         fault_info.cs_bypass = FALSE;
10562         fault_info.mark_zf_absent = FALSE;
10563         fault_info.batch_pmap_op = FALSE;
10564
10565         /*
10566          * The MADV_WILLNEED operation doesn't require any changes to the
10567          * vm_map_entry_t's, so the read lock is sufficient.
10568          */
10569
10570         vm_map_lock_read(map);
10571
10572         /*
10573          * The madvise semantics require that the address range be fully
10574          * allocated with no holes.  Otherwise, we're required to return
10575          * an error.
10576          */
10577
10578         if (! vm_map_range_check(map, start, end, &entry)) {
10579                 vm_map_unlock_read(map);
10580                 return KERN_INVALID_ADDRESS;
10581         }
10582
10583         /*
10584          * Examine each vm_map_entry_t in the range.
10585          */
10586         for (; entry != vm_map_to_entry(map) && start < end; ) {
10587
10588                 /*
10589                  * The first time through, the start address could be anywhere
10590                  * within the vm_map_entry we found.  So adjust the offset to
10591                  * correspond.  After that, the offset will always be zero to
10592                  * correspond to the beginning of the current vm_map_entry.
10593                  */
10594                 offset = (start - entry->vme_start) + entry->offset;
10595
10596                 /*
10597                  * Set the length so we don't go beyond the end of the
10598                  * map_entry or beyond the end of the range we were given.
10599                  * This range could span also multiple map entries all of which
10600                  * map different files, so make sure we only do the right amount
10601                  * of I/O for each object.  Note that it's possible for there
10602                  * to be multiple map entries all referring to the same object
10603                  * but with different page permissions, but it's not worth
10604                  * trying to optimize that case.
10605                  */
10606                 len = MIN(entry->vme_end - start, end - start);
10607
10608                 if ((vm_size_t) len != len) {
10609                         /* 32-bit overflow */
10610                         len = (vm_size_t) (0 - PAGE_SIZE);
10611                 }
10612                 fault_info.cluster_size = (vm_size_t) len;
10613                 fault_info.lo_offset    = offset;
10614                 fault_info.hi_offset    = offset + len;
10615                 fault_info.user_tag     = entry->alias;
10616
10617                 /*
10618                  * If there's no read permission to this mapping, then just
10619                  * skip it.
10620                  */
10621                 if ((entry->protection & VM_PROT_READ) == 0) {
10622                         entry = entry->vme_next;
10623                         start = entry->vme_start;
10624                         continue;
10625                 }
10626
10627                 /*
10628                  * Find the file object backing this map entry.  If there is
10629                  * none, then we simply ignore the "will need" advice for this
10630                  * entry and go on to the next one.
10631                  */
10632                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
10633                         entry = entry->vme_next;
10634                         start = entry->vme_start;
10635                         continue;
10636                 }
10637
10638                 /*
10639                  * The data_request() could take a long time, so let's
10640                  * release the map lock to avoid blocking other threads.
10641                  */
10642                 vm_map_unlock_read(map);
10643
10644                 vm_object_paging_begin(object);
10645                 pager = object->pager;
10646                 vm_object_unlock(object);
10647
10648                 /*
10649                  * Get the data from the object asynchronously.
10650                  *
10651                  * Note that memory_object_data_request() places limits on the
10652                  * amount of I/O it will do.  Regardless of the len we
10653                  * specified, it won't do more than MAX_UPL_TRANSFER and it
10654                  * silently truncates the len to that size.  This isn't
10655                  * necessarily bad since madvise shouldn't really be used to
10656                  * page in unlimited amounts of data.  Other Unix variants
10657                  * limit the willneed case as well.  If this turns out to be an
10658                  * issue for developers, then we can always adjust the policy
10659                  * here and still be backwards compatible since this is all
10660                  * just "advice".
10661                  */
10662                 kr = memory_object_data_request(
10663                         pager,
10664                         offset + object->paging_offset,
10665                         0,      /* ignored */
10666                         VM_PROT_READ,
10667                         (memory_object_fault_info_t)&fault_info);
10668
10669                 vm_object_lock(object);
10670                 vm_object_paging_end(object);
10671                 vm_object_unlock(object);
10672
10673                 /*
10674                  * If we couldn't do the I/O for some reason, just give up on
10675                  * the madvise.  We still return success to the user since
10676                  * madvise isn't supposed to fail when the advice can't be
10677                  * taken.
10678                  */
10679                 if (kr != KERN_SUCCESS) {
10680                         return KERN_SUCCESS;
10681                 }
10682
10683                 start += len;
10684                 if (start >= end) {
10685                         /* done */
10686                         return KERN_SUCCESS;
10687                 }
10688
10689                 /* look up next entry */
10690                 vm_map_lock_read(map);
10691                 if (! vm_map_lookup_entry(map, start, &entry)) {
10692                         /*
10693                          * There's a new hole in the address range.
10694                          */
10695                         vm_map_unlock_read(map);
10696                         return KERN_INVALID_ADDRESS;
10697                 }
10698         }
10699
10700         vm_map_unlock_read(map);
10701         return KERN_SUCCESS;
10702 }
10703
10704 static boolean_t
10705 vm_map_entry_is_reusable(
10706         vm_map_entry_t entry)
10707 {
10708         vm_object_t object;
10709
10710         if (entry->is_shared ||
10711             entry->is_sub_map ||
10712             entry->in_transition ||
10713             entry->protection != VM_PROT_DEFAULT ||
10714             entry->max_protection != VM_PROT_ALL ||
10715             entry->inheritance != VM_INHERIT_DEFAULT ||
10716             entry->no_cache ||
10717             entry->permanent ||
10718             entry->superpage_size != 0 ||
10719             entry->zero_wired_pages ||
10720             entry->wired_count != 0 ||
10721             entry->user_wired_count != 0) {
10722                 return FALSE;
10723         }
10724
10725         object = entry->object.vm_object;
10726         if (object == VM_OBJECT_NULL) {
10727                 return TRUE;
10728         }
10729         if (
10730 #if 0
10731                 /*
10732                  * Let's proceed even if the VM object is potentially
10733                  * shared.
10734                  * We check for this later when processing the actual
10735                  * VM pages, so the contents will be safe if shared.
10736                  *
10737                  * But we can still mark this memory region as "reusable" to
10738                  * acknowledge that the caller did let us know that the memory
10739                  * could be re-used and should not be penalized for holding
10740                  * on to it.  This allows its "resident size" to not include
10741                  * the reusable range.
10742                  */
10743             object->ref_count == 1 &&
10744 #endif
10745             object->wired_page_count == 0 &&
10746             object->copy == VM_OBJECT_NULL &&
10747             object->shadow == VM_OBJECT_NULL &&
10748             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10749             object->internal &&
10750             !object->true_share &&
10751             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
10752             !object->code_signed) {
10753                 return TRUE;
10754         }
10755         return FALSE;
10756
10757
10758 }
10759
10760 static kern_return_t
10761 vm_map_reuse_pages(
10762         vm_map_t        map,
10763         vm_map_offset_t start,
10764         vm_map_offset_t end)
10765 {
10766         vm_map_entry_t                  entry;
10767         vm_object_t                     object;
10768         vm_object_offset_t              start_offset, end_offset;
10769
10770         /*
10771          * The MADV_REUSE operation doesn't require any changes to the
10772          * vm_map_entry_t's, so the read lock is sufficient.
10773          */
10774
10775         vm_map_lock_read(map);
10776
10777         /*
10778          * The madvise semantics require that the address range be fully
10779          * allocated with no holes.  Otherwise, we're required to return
10780          * an error.
10781          */
10782
10783         if (!vm_map_range_check(map, start, end, &entry)) {
10784                 vm_map_unlock_read(map);
10785                 vm_page_stats_reusable.reuse_pages_failure++;
10786                 return KERN_INVALID_ADDRESS;
10787         }
10788
10789         /*
10790          * Examine each vm_map_entry_t in the range.
10791          */
10792         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10793              entry = entry->vme_next) {
10794                 /*
10795                  * Sanity check on the VM map entry.
10796                  */
10797                 if (! vm_map_entry_is_reusable(entry)) {
10798                         vm_map_unlock_read(map);
10799                         vm_page_stats_reusable.reuse_pages_failure++;
10800                         return KERN_INVALID_ADDRESS;
10801                 }
10802
10803                 /*
10804                  * The first time through, the start address could be anywhere
10805                  * within the vm_map_entry we found.  So adjust the offset to
10806                  * correspond.
10807                  */
10808                 if (entry->vme_start < start) {
10809                         start_offset = start - entry->vme_start;
10810                 } else {
10811                         start_offset = 0;
10812                 }
10813                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10814                 start_offset += entry->offset;
10815                 end_offset += entry->offset;
10816
10817                 object = entry->object.vm_object;
10818                 if (object != VM_OBJECT_NULL) {
10819                         vm_object_lock(object);
10820                         vm_object_reuse_pages(object, start_offset, end_offset,
10821                                               TRUE);
10822                         vm_object_unlock(object);
10823                 }
10824
10825                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
10826                         /*
10827                          * XXX
10828                          * We do not hold the VM map exclusively here.
10829                          * The "alias" field is not that critical, so it's
10830                          * safe to update it here, as long as it is the only
10831                          * one that can be modified while holding the VM map
10832                          * "shared".
10833                          */
10834                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
10835                 }
10836         }
10837
10838         vm_map_unlock_read(map);
10839         vm_page_stats_reusable.reuse_pages_success++;
10840         return KERN_SUCCESS;
10841 }
10842
10843
10844 static kern_return_t
10845 vm_map_reusable_pages(
10846         vm_map_t        map,
10847         vm_map_offset_t start,
10848         vm_map_offset_t end)
10849 {
10850         vm_map_entry_t                  entry;
10851         vm_object_t                     object;
10852         vm_object_offset_t              start_offset, end_offset;
10853
10854         /*
10855          * The MADV_REUSABLE operation doesn't require any changes to the
10856          * vm_map_entry_t's, so the read lock is sufficient.
10857          */
10858
10859         vm_map_lock_read(map);
10860
10861         /*
10862          * The madvise semantics require that the address range be fully
10863          * allocated with no holes.  Otherwise, we're required to return
10864          * an error.
10865          */
10866
10867         if (!vm_map_range_check(map, start, end, &entry)) {
10868                 vm_map_unlock_read(map);
10869                 vm_page_stats_reusable.reusable_pages_failure++;
10870                 return KERN_INVALID_ADDRESS;
10871         }
10872
10873         /*
10874          * Examine each vm_map_entry_t in the range.
10875          */
10876         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10877              entry = entry->vme_next) {
10878                 int kill_pages = 0;
10879
10880                 /*
10881                  * Sanity check on the VM map entry.
10882                  */
10883                 if (! vm_map_entry_is_reusable(entry)) {
10884                         vm_map_unlock_read(map);
10885                         vm_page_stats_reusable.reusable_pages_failure++;
10886                         return KERN_INVALID_ADDRESS;
10887                 }
10888
10889                 /*
10890                  * The first time through, the start address could be anywhere
10891                  * within the vm_map_entry we found.  So adjust the offset to
10892                  * correspond.
10893                  */
10894                 if (entry->vme_start < start) {
10895                         start_offset = start - entry->vme_start;
10896                 } else {
10897                         start_offset = 0;
10898                 }
10899                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
10900                 start_offset += entry->offset;
10901                 end_offset += entry->offset;
10902
10903                 object = entry->object.vm_object;
10904                 if (object == VM_OBJECT_NULL)
10905                         continue;
10906
10907
10908                 vm_object_lock(object);
10909                 if (object->ref_count == 1 && !object->shadow)
10910                         kill_pages = 1;
10911                 else
10912                         kill_pages = -1;
10913                 if (kill_pages != -1) {
10914                         vm_object_deactivate_pages(object,
10915                                                    start_offset,
10916                                                    end_offset - start_offset,
10917                                                    kill_pages,
10918                                                    TRUE /*reusable_pages*/);
10919                 } else {
10920                         vm_page_stats_reusable.reusable_pages_shared++;
10921                 }
10922                 vm_object_unlock(object);
10923
10924                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
10925                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
10926                         /*
10927                          * XXX
10928                          * We do not hold the VM map exclusively here.
10929                          * The "alias" field is not that critical, so it's
10930                          * safe to update it here, as long as it is the only
10931                          * one that can be modified while holding the VM map
10932                          * "shared".
10933                          */
10934                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
10935                 }
10936         }
10937
10938         vm_map_unlock_read(map);
10939         vm_page_stats_reusable.reusable_pages_success++;
10940         return KERN_SUCCESS;
10941 }
10942
10943
10944 static kern_return_t
10945 vm_map_can_reuse(
10946         vm_map_t        map,
10947         vm_map_offset_t start,
10948         vm_map_offset_t end)
10949 {
10950         vm_map_entry_t                  entry;
10951
10952         /*
10953          * The MADV_REUSABLE operation doesn't require any changes to the
10954          * vm_map_entry_t's, so the read lock is sufficient.
10955          */
10956
10957         vm_map_lock_read(map);
10958
10959         /*
10960          * The madvise semantics require that the address range be fully
10961          * allocated with no holes.  Otherwise, we're required to return
10962          * an error.
10963          */
10964
10965         if (!vm_map_range_check(map, start, end, &entry)) {
10966                 vm_map_unlock_read(map);
10967                 vm_page_stats_reusable.can_reuse_failure++;
10968                 return KERN_INVALID_ADDRESS;
10969         }
10970
10971         /*
10972          * Examine each vm_map_entry_t in the range.
10973          */
10974         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
10975              entry = entry->vme_next) {
10976                 /*
10977                  * Sanity check on the VM map entry.
10978                  */
10979                 if (! vm_map_entry_is_reusable(entry)) {
10980                         vm_map_unlock_read(map);
10981                         vm_page_stats_reusable.can_reuse_failure++;
10982                         return KERN_INVALID_ADDRESS;
10983                 }
10984         }
10985
10986         vm_map_unlock_read(map);
10987         vm_page_stats_reusable.can_reuse_success++;
10988         return KERN_SUCCESS;
10989 }
10990
10991
10992 /*
10993  *      Routine:        vm_map_entry_insert
10994  *
10995  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
10996  */
10997 vm_map_entry_t
10998 vm_map_entry_insert(
10999         vm_map_t                map,
11000         vm_map_entry_t          insp_entry,
11001         vm_map_offset_t         start,
11002         vm_map_offset_t         end,
11003         vm_object_t             object,
11004         vm_object_offset_t      offset,
11005         boolean_t               needs_copy,
11006         boolean_t               is_shared,
11007         boolean_t               in_transition,
11008         vm_prot_t               cur_protection,
11009         vm_prot_t               max_protection,
11010         vm_behavior_t           behavior,
11011         vm_inherit_t            inheritance,
11012         unsigned                wired_count,
11013         boolean_t               no_cache,
11014         boolean_t               permanent,
11015         unsigned int            superpage_size)
11016 {
11017         vm_map_entry_t  new_entry;
11018
11019         assert(insp_entry != (vm_map_entry_t)0);
11020
11021         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
11022
11023         new_entry->vme_start = start;
11024         new_entry->vme_end = end;
11025         assert(page_aligned(new_entry->vme_start));
11026         assert(page_aligned(new_entry->vme_end));
11027         assert(new_entry->vme_start < new_entry->vme_end);
11028
11029         new_entry->object.vm_object = object;
11030         new_entry->offset = offset;
11031         new_entry->is_shared = is_shared;
11032         new_entry->is_sub_map = FALSE;
11033         new_entry->needs_copy = needs_copy;
11034         new_entry->in_transition = in_transition;
11035         new_entry->needs_wakeup = FALSE;
11036         new_entry->inheritance = inheritance;
11037         new_entry->protection = cur_protection;
11038         new_entry->max_protection = max_protection;
11039         new_entry->behavior = behavior;
11040         new_entry->wired_count = wired_count;
11041         new_entry->user_wired_count = 0;
11042         new_entry->use_pmap = FALSE;
11043         new_entry->alias = 0;
11044         new_entry->zero_wired_pages = FALSE;
11045         new_entry->no_cache = no_cache;
11046         new_entry->permanent = permanent;
11047         new_entry->superpage_size = superpage_size;
11048         new_entry->used_for_jit = FALSE;
11049
11050         /*
11051          *      Insert the new entry into the list.
11052          */
11053
11054         vm_map_store_entry_link(map, insp_entry, new_entry);
11055         map->size += end - start;
11056
11057         /*
11058          *      Update the free space hint and the lookup hint.
11059          */
11060
11061         SAVE_HINT_MAP_WRITE(map, new_entry);
11062         return new_entry;
11063 }
11064
11065 /*
11066  *      Routine:        vm_map_remap_extract
11067  *
11068  *      Descritpion:    This routine returns a vm_entry list from a map.
11069  */
11070 static kern_return_t
11071 vm_map_remap_extract(
11072         vm_map_t                map,
11073         vm_map_offset_t         addr,
11074         vm_map_size_t           size,
11075         boolean_t               copy,
11076         struct vm_map_header    *map_header,
11077         vm_prot_t               *cur_protection,
11078         vm_prot_t               *max_protection,
11079         /* What, no behavior? */
11080         vm_inherit_t            inheritance,
11081         boolean_t               pageable)
11082 {
11083         kern_return_t           result;
11084         vm_map_size_t           mapped_size;
11085         vm_map_size_t           tmp_size;
11086         vm_map_entry_t          src_entry;     /* result of last map lookup */
11087         vm_map_entry_t          new_entry;
11088         vm_object_offset_t      offset;
11089         vm_map_offset_t         map_address;
11090         vm_map_offset_t         src_start;     /* start of entry to map */
11091         vm_map_offset_t         src_end;       /* end of region to be mapped */
11092         vm_object_t             object;
11093         vm_map_version_t        version;
11094         boolean_t               src_needs_copy;
11095         boolean_t               new_entry_needs_copy;
11096
11097         assert(map != VM_MAP_NULL);
11098         assert(size != 0 && size == vm_map_round_page(size));
11099         assert(inheritance == VM_INHERIT_NONE ||
11100                inheritance == VM_INHERIT_COPY ||
11101                inheritance == VM_INHERIT_SHARE);
11102
11103         /*
11104          *      Compute start and end of region.
11105          */
11106         src_start = vm_map_trunc_page(addr);
11107         src_end = vm_map_round_page(src_start + size);
11108
11109         /*
11110          *      Initialize map_header.
11111          */
11112         map_header->links.next = (struct vm_map_entry *)&map_header->links;
11113         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
11114         map_header->nentries = 0;
11115         map_header->entries_pageable = pageable;
11116
11117         vm_map_store_init( map_header );
11118
11119         *cur_protection = VM_PROT_ALL;
11120         *max_protection = VM_PROT_ALL;
11121
11122         map_address = 0;
11123         mapped_size = 0;
11124         result = KERN_SUCCESS;
11125
11126         /*
11127          *      The specified source virtual space might correspond to
11128          *      multiple map entries, need to loop on them.
11129          */
11130         vm_map_lock(map);
11131         while (mapped_size != size) {
11132                 vm_map_size_t   entry_size;
11133
11134                 /*
11135                  *      Find the beginning of the region.
11136                  */
11137                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
11138                         result = KERN_INVALID_ADDRESS;
11139                         break;
11140                 }
11141
11142                 if (src_start < src_entry->vme_start ||
11143                     (mapped_size && src_start != src_entry->vme_start)) {
11144                         result = KERN_INVALID_ADDRESS;
11145                         break;
11146                 }
11147
11148                 tmp_size = size - mapped_size;
11149                 if (src_end > src_entry->vme_end)
11150                         tmp_size -= (src_end - src_entry->vme_end);
11151
11152                 entry_size = (vm_map_size_t)(src_entry->vme_end -
11153                                              src_entry->vme_start);
11154
11155                 if(src_entry->is_sub_map) {
11156                         vm_map_reference(src_entry->object.sub_map);
11157                         object = VM_OBJECT_NULL;
11158                 } else {
11159                         object = src_entry->object.vm_object;
11160
11161                         if (object == VM_OBJECT_NULL) {
11162                                 object = vm_object_allocate(entry_size);
11163                                 src_entry->offset = 0;
11164                                 src_entry->object.vm_object = object;
11165                         } else if (object->copy_strategy !=
11166                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11167                                 /*
11168                                  *      We are already using an asymmetric
11169                                  *      copy, and therefore we already have
11170                                  *      the right object.
11171                                  */
11172                                 assert(!src_entry->needs_copy);
11173                         } else if (src_entry->needs_copy || object->shadowed ||
11174                                    (object->internal && !object->true_share &&
11175                                     !src_entry->is_shared &&
11176                                     object->vo_size > entry_size)) {
11177
11178                                 vm_object_shadow(&src_entry->object.vm_object,
11179                                                  &src_entry->offset,
11180                                                  entry_size);
11181
11182                                 if (!src_entry->needs_copy &&
11183                                     (src_entry->protection & VM_PROT_WRITE)) {
11184                                         vm_prot_t prot;
11185
11186                                         prot = src_entry->protection & ~VM_PROT_WRITE;
11187
11188                                         if (override_nx(map, src_entry->alias) && prot)
11189                                                 prot |= VM_PROT_EXECUTE;
11190
11191                                         if(map->mapped_in_other_pmaps) {
11192                                                 vm_object_pmap_protect(
11193                                                         src_entry->object.vm_object,
11194                                                         src_entry->offset,
11195                                                         entry_size,
11196                                                         PMAP_NULL,
11197                                                         src_entry->vme_start,
11198                                                         prot);
11199                                         } else {
11200                                                 pmap_protect(vm_map_pmap(map),
11201                                                              src_entry->vme_start,
11202                                                              src_entry->vme_end,
11203                                                              prot);
11204                                         }
11205                                 }
11206
11207                                 object = src_entry->object.vm_object;
11208                                 src_entry->needs_copy = FALSE;
11209                         }
11210
11211
11212                         vm_object_lock(object);
11213                         vm_object_reference_locked(object); /* object ref. for new entry */
11214                         if (object->copy_strategy ==
11215                             MEMORY_OBJECT_COPY_SYMMETRIC) {
11216                                 object->copy_strategy =
11217                                         MEMORY_OBJECT_COPY_DELAY;
11218                         }
11219                         vm_object_unlock(object);
11220                 }
11221
11222                 offset = src_entry->offset + (src_start - src_entry->vme_start);
11223
11224                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
11225                 vm_map_entry_copy(new_entry, src_entry);
11226                 new_entry->use_pmap = FALSE; /* clr address space specifics */
11227
11228                 new_entry->vme_start = map_address;
11229                 new_entry->vme_end = map_address + tmp_size;
11230                 assert(new_entry->vme_start < new_entry->vme_end);
11231                 new_entry->inheritance = inheritance;
11232                 new_entry->offset = offset;
11233
11234                 /*
11235                  * The new region has to be copied now if required.
11236                  */
11237         RestartCopy:
11238                 if (!copy) {
11239                         /*
11240                          * Cannot allow an entry describing a JIT
11241                          * region to be shared across address spaces.
11242                          */
11243                         if (src_entry->used_for_jit == TRUE) {
11244                                 result = KERN_INVALID_ARGUMENT;
11245                                 break;
11246                         }
11247                         src_entry->is_shared = TRUE;
11248                         new_entry->is_shared = TRUE;
11249                         if (!(new_entry->is_sub_map))
11250                                 new_entry->needs_copy = FALSE;
11251
11252                 } else if (src_entry->is_sub_map) {
11253                         /* make this a COW sub_map if not already */
11254                         new_entry->needs_copy = TRUE;
11255                         object = VM_OBJECT_NULL;
11256                 } else if (src_entry->wired_count == 0 &&
11257                            vm_object_copy_quickly(&new_entry->object.vm_object,
11258                                                   new_entry->offset,
11259                                                   (new_entry->vme_end -
11260                                                    new_entry->vme_start),
11261                                                   &src_needs_copy,
11262                                                   &new_entry_needs_copy)) {
11263
11264                         new_entry->needs_copy = new_entry_needs_copy;
11265                         new_entry->is_shared = FALSE;
11266
11267                         /*
11268                          * Handle copy_on_write semantics.
11269                          */
11270                         if (src_needs_copy && !src_entry->needs_copy) {
11271                                 vm_prot_t prot;
11272
11273                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11274
11275                                 if (override_nx(map, src_entry->alias) && prot)
11276                                         prot |= VM_PROT_EXECUTE;
11277
11278                                 vm_object_pmap_protect(object,
11279                                                        offset,
11280                                                        entry_size,
11281                                                        ((src_entry->is_shared
11282                                                          || map->mapped_in_other_pmaps) ?
11283                                                         PMAP_NULL : map->pmap),
11284                                                        src_entry->vme_start,
11285                                                        prot);
11286
11287                                 src_entry->needs_copy = TRUE;
11288                         }
11289                         /*
11290                          * Throw away the old object reference of the new entry.
11291                          */
11292                         vm_object_deallocate(object);
11293
11294                 } else {
11295                         new_entry->is_shared = FALSE;
11296
11297                         /*
11298                          * The map can be safely unlocked since we
11299                          * already hold a reference on the object.
11300                          *
11301                          * Record the timestamp of the map for later
11302                          * verification, and unlock the map.
11303                          */
11304                         version.main_timestamp = map->timestamp;
11305                         vm_map_unlock(map);     /* Increments timestamp once! */
11306
11307                         /*
11308                          * Perform the copy.
11309                          */
11310                         if (src_entry->wired_count > 0) {
11311                                 vm_object_lock(object);
11312                                 result = vm_object_copy_slowly(
11313                                         object,
11314                                         offset,
11315                                         entry_size,
11316                                         THREAD_UNINT,
11317                                         &new_entry->object.vm_object);
11318
11319                                 new_entry->offset = 0;
11320                                 new_entry->needs_copy = FALSE;
11321                         } else {
11322                                 result = vm_object_copy_strategically(
11323                                         object,
11324                                         offset,
11325                                         entry_size,
11326                                         &new_entry->object.vm_object,
11327                                         &new_entry->offset,
11328                                         &new_entry_needs_copy);
11329
11330                                 new_entry->needs_copy = new_entry_needs_copy;
11331                         }
11332
11333                         /*
11334                          * Throw away the old object reference of the new entry.
11335                          */
11336                         vm_object_deallocate(object);
11337
11338                         if (result != KERN_SUCCESS &&
11339                             result != KERN_MEMORY_RESTART_COPY) {
11340                                 _vm_map_entry_dispose(map_header, new_entry);
11341                                 break;
11342                         }
11343
11344                         /*
11345                          * Verify that the map has not substantially
11346                          * changed while the copy was being made.
11347                          */
11348
11349                         vm_map_lock(map);
11350                         if (version.main_timestamp + 1 != map->timestamp) {
11351                                 /*
11352                                  * Simple version comparison failed.
11353                                  *
11354                                  * Retry the lookup and verify that the
11355                                  * same object/offset are still present.
11356                                  */
11357                                 vm_object_deallocate(new_entry->
11358                                                      object.vm_object);
11359                                 _vm_map_entry_dispose(map_header, new_entry);
11360                                 if (result == KERN_MEMORY_RESTART_COPY)
11361                                         result = KERN_SUCCESS;
11362                                 continue;
11363                         }
11364
11365                         if (result == KERN_MEMORY_RESTART_COPY) {
11366                                 vm_object_reference(object);
11367                                 goto RestartCopy;
11368                         }
11369                 }
11370
11371                 _vm_map_store_entry_link(map_header,
11372                                    map_header->links.prev, new_entry);
11373
11374                 /*Protections for submap mapping are irrelevant here*/
11375                 if( !src_entry->is_sub_map ) {
11376                         *cur_protection &= src_entry->protection;
11377                         *max_protection &= src_entry->max_protection;
11378                 }
11379                 map_address += tmp_size;
11380                 mapped_size += tmp_size;
11381                 src_start += tmp_size;
11382
11383         } /* end while */
11384
11385         vm_map_unlock(map);
11386         if (result != KERN_SUCCESS) {
11387                 /*
11388                  * Free all allocated elements.
11389                  */
11390                 for (src_entry = map_header->links.next;
11391                      src_entry != (struct vm_map_entry *)&map_header->links;
11392                      src_entry = new_entry) {
11393                         new_entry = src_entry->vme_next;
11394                         _vm_map_store_entry_unlink(map_header, src_entry);
11395                         vm_object_deallocate(src_entry->object.vm_object);
11396                         _vm_map_entry_dispose(map_header, src_entry);
11397                 }
11398         }
11399         return result;
11400 }
11401
11402 /*
11403  *      Routine:        vm_remap
11404  *
11405  *                      Map portion of a task's address space.
11406  *                      Mapped region must not overlap more than
11407  *                      one vm memory object. Protections and
11408  *                      inheritance attributes remain the same
11409  *                      as in the original task and are out parameters.
11410  *                      Source and Target task can be identical
11411  *                      Other attributes are identical as for vm_map()
11412  */
11413 kern_return_t
11414 vm_map_remap(
11415         vm_map_t                target_map,
11416         vm_map_address_t        *address,
11417         vm_map_size_t           size,
11418         vm_map_offset_t         mask,
11419         int                     flags,
11420         vm_map_t                src_map,
11421         vm_map_offset_t         memory_address,
11422         boolean_t               copy,
11423         vm_prot_t               *cur_protection,
11424         vm_prot_t               *max_protection,
11425         vm_inherit_t            inheritance)
11426 {
11427         kern_return_t           result;
11428         vm_map_entry_t          entry;
11429         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
11430         vm_map_entry_t          new_entry;
11431         struct vm_map_header    map_header;
11432
11433         if (target_map == VM_MAP_NULL)
11434                 return KERN_INVALID_ARGUMENT;
11435
11436         switch (inheritance) {
11437         case VM_INHERIT_NONE:
11438         case VM_INHERIT_COPY:
11439         case VM_INHERIT_SHARE:
11440                 if (size != 0 && src_map != VM_MAP_NULL)
11441                         break;
11442                 /*FALL THRU*/
11443         default:
11444                 return KERN_INVALID_ARGUMENT;
11445         }
11446
11447         size = vm_map_round_page(size);
11448
11449         result = vm_map_remap_extract(src_map, memory_address,
11450                                       size, copy, &map_header,
11451                                       cur_protection,
11452                                       max_protection,
11453                                       inheritance,
11454                                       target_map->hdr.
11455                                       entries_pageable);
11456
11457         if (result != KERN_SUCCESS) {
11458                 return result;
11459         }
11460
11461         /*
11462          * Allocate/check a range of free virtual address
11463          * space for the target
11464          */
11465         *address = vm_map_trunc_page(*address);
11466         vm_map_lock(target_map);
11467         result = vm_map_remap_range_allocate(target_map, address, size,
11468                                              mask, flags, &insp_entry);
11469
11470         for (entry = map_header.links.next;
11471              entry != (struct vm_map_entry *)&map_header.links;
11472              entry = new_entry) {
11473                 new_entry = entry->vme_next;
11474                 _vm_map_store_entry_unlink(&map_header, entry);
11475                 if (result == KERN_SUCCESS) {
11476                         entry->vme_start += *address;
11477                         entry->vme_end += *address;
11478                         vm_map_store_entry_link(target_map, insp_entry, entry);
11479                         insp_entry = entry;
11480                 } else {
11481                         if (!entry->is_sub_map) {
11482                                 vm_object_deallocate(entry->object.vm_object);
11483                         } else {
11484                                 vm_map_deallocate(entry->object.sub_map);
11485                         }
11486                         _vm_map_entry_dispose(&map_header, entry);
11487                 }
11488         }
11489
11490         if( target_map->disable_vmentry_reuse == TRUE) {
11491                 if( target_map->highest_entry_end < insp_entry->vme_end ){
11492                         target_map->highest_entry_end = insp_entry->vme_end;
11493                 }
11494         }
11495
11496         if (result == KERN_SUCCESS) {
11497                 target_map->size += size;
11498                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
11499         }
11500         vm_map_unlock(target_map);
11501
11502         if (result == KERN_SUCCESS && target_map->wiring_required)
11503                 result = vm_map_wire(target_map, *address,
11504                                      *address + size, *cur_protection, TRUE);
11505         return result;
11506 }
11507
11508 /*
11509  *      Routine:        vm_map_remap_range_allocate
11510  *
11511  *      Description:
11512  *              Allocate a range in the specified virtual address map.
11513  *              returns the address and the map entry just before the allocated
11514  *              range
11515  *
11516  *      Map must be locked.
11517  */
11518
11519 static kern_return_t
11520 vm_map_remap_range_allocate(
11521         vm_map_t                map,
11522         vm_map_address_t        *address,       /* IN/OUT */
11523         vm_map_size_t           size,
11524         vm_map_offset_t         mask,
11525         int                     flags,
11526         vm_map_entry_t          *map_entry)     /* OUT */
11527 {
11528         vm_map_entry_t  entry;
11529         vm_map_offset_t start;
11530         vm_map_offset_t end;
11531         kern_return_t   kr;
11532
11533 StartAgain: ;
11534
11535         start = *address;
11536
11537         if (flags & VM_FLAGS_ANYWHERE)
11538         {
11539                 /*
11540                  *      Calculate the first possible address.
11541                  */
11542
11543                 if (start < map->min_offset)
11544                         start = map->min_offset;
11545                 if (start > map->max_offset)
11546                         return(KERN_NO_SPACE);
11547
11548                 /*
11549                  *      Look for the first possible address;
11550                  *      if there's already something at this
11551                  *      address, we have to start after it.
11552                  */
11553
11554                 if( map->disable_vmentry_reuse == TRUE) {
11555                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
11556                 } else {
11557                         assert(first_free_is_valid(map));
11558                         if (start == map->min_offset) {
11559                                 if ((entry = map->first_free) != vm_map_to_entry(map))
11560                                         start = entry->vme_end;
11561                         } else {
11562                                 vm_map_entry_t  tmp_entry;
11563                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
11564                                         start = tmp_entry->vme_end;
11565                                 entry = tmp_entry;
11566                         }
11567                 }
11568
11569                 /*
11570                  *      In any case, the "entry" always precedes
11571                  *      the proposed new region throughout the
11572                  *      loop:
11573                  */
11574
11575                 while (TRUE) {
11576                         register vm_map_entry_t next;
11577
11578                         /*
11579                          *      Find the end of the proposed new region.
11580                          *      Be sure we didn't go beyond the end, or
11581                          *      wrap around the address.
11582                          */
11583
11584                         end = ((start + mask) & ~mask);
11585                         if (end < start)
11586                                 return(KERN_NO_SPACE);
11587                         start = end;
11588                         end += size;
11589
11590                         if ((end > map->max_offset) || (end < start)) {
11591                                 if (map->wait_for_space) {
11592                                         if (size <= (map->max_offset -
11593                                                      map->min_offset)) {
11594                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
11595                                                 vm_map_unlock(map);
11596                                                 thread_block(THREAD_CONTINUE_NULL);
11597                                                 vm_map_lock(map);
11598                                                 goto StartAgain;
11599                                         }
11600                                 }
11601
11602                                 return(KERN_NO_SPACE);
11603                         }
11604
11605                         /*
11606                          *      If there are no more entries, we must win.
11607                          */
11608
11609                         next = entry->vme_next;
11610                         if (next == vm_map_to_entry(map))
11611                                 break;
11612
11613                         /*
11614                          *      If there is another entry, it must be
11615                          *      after the end of the potential new region.
11616                          */
11617
11618                         if (next->vme_start >= end)
11619                                 break;
11620
11621                         /*
11622                          *      Didn't fit -- move to the next entry.
11623                          */
11624
11625                         entry = next;
11626                         start = entry->vme_end;
11627                 }
11628                 *address = start;
11629         } else {
11630                 vm_map_entry_t          temp_entry;
11631
11632                 /*
11633                  *      Verify that:
11634                  *              the address doesn't itself violate
11635                  *              the mask requirement.
11636                  */
11637
11638                 if ((start & mask) != 0)
11639                         return(KERN_NO_SPACE);
11640
11641
11642                 /*
11643                  *      ...     the address is within bounds
11644                  */
11645
11646                 end = start + size;
11647
11648                 if ((start < map->min_offset) ||
11649                     (end > map->max_offset) ||
11650                     (start >= end)) {
11651                         return(KERN_INVALID_ADDRESS);
11652                 }
11653
11654                 /*
11655                  * If we're asked to overwrite whatever was mapped in that
11656                  * range, first deallocate that range.
11657                  */
11658                 if (flags & VM_FLAGS_OVERWRITE) {
11659                         vm_map_t zap_map;
11660
11661                         /*
11662                          * We use a "zap_map" to avoid having to unlock
11663                          * the "map" in vm_map_delete(), which would compromise
11664                          * the atomicity of the "deallocate" and then "remap"
11665                          * combination.
11666                          */
11667                         zap_map = vm_map_create(PMAP_NULL,
11668                                                 start,
11669                                                 end,
11670                                                 map->hdr.entries_pageable);
11671                         if (zap_map == VM_MAP_NULL) {
11672                                 return KERN_RESOURCE_SHORTAGE;
11673                         }
11674
11675                         kr = vm_map_delete(map, start, end,
11676                                            VM_MAP_REMOVE_SAVE_ENTRIES,
11677                                            zap_map);
11678                         if (kr == KERN_SUCCESS) {
11679                                 vm_map_destroy(zap_map,
11680                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
11681                                 zap_map = VM_MAP_NULL;
11682                         }
11683                 }
11684
11685                 /*
11686                  *      ...     the starting address isn't allocated
11687                  */
11688
11689                 if (vm_map_lookup_entry(map, start, &temp_entry))
11690                         return(KERN_NO_SPACE);
11691
11692                 entry = temp_entry;
11693
11694                 /*
11695                  *      ...     the next region doesn't overlap the
11696                  *              end point.
11697                  */
11698
11699                 if ((entry->vme_next != vm_map_to_entry(map)) &&
11700                     (entry->vme_next->vme_start < end))
11701                         return(KERN_NO_SPACE);
11702         }
11703         *map_entry = entry;
11704         return(KERN_SUCCESS);
11705 }
11706
11707 /*
11708  *      vm_map_switch:
11709  *
11710  *      Set the address map for the current thread to the specified map
11711  */
11712
11713 vm_map_t
11714 vm_map_switch(
11715         vm_map_t        map)
11716 {
11717         int             mycpu;
11718         thread_t        thread = current_thread();
11719         vm_map_t        oldmap = thread->map;
11720
11721         mp_disable_preemption();
11722         mycpu = cpu_number();
11723
11724         /*
11725          *      Deactivate the current map and activate the requested map
11726          */
11727         PMAP_SWITCH_USER(thread, map, mycpu);
11728
11729         mp_enable_preemption();
11730         return(oldmap);
11731 }
11732
11733
11734 /*
11735  *      Routine:        vm_map_write_user
11736  *
11737  *      Description:
11738  *              Copy out data from a kernel space into space in the
11739  *              destination map. The space must already exist in the
11740  *              destination map.
11741  *              NOTE:  This routine should only be called by threads
11742  *              which can block on a page fault. i.e. kernel mode user
11743  *              threads.
11744  *
11745  */
11746 kern_return_t
11747 vm_map_write_user(
11748         vm_map_t                map,
11749         void                    *src_p,
11750         vm_map_address_t        dst_addr,
11751         vm_size_t               size)
11752 {
11753         kern_return_t   kr = KERN_SUCCESS;
11754
11755         if(current_map() == map) {
11756                 if (copyout(src_p, dst_addr, size)) {
11757                         kr = KERN_INVALID_ADDRESS;
11758                 }
11759         } else {
11760                 vm_map_t        oldmap;
11761
11762                 /* take on the identity of the target map while doing */
11763                 /* the transfer */
11764
11765                 vm_map_reference(map);
11766                 oldmap = vm_map_switch(map);
11767                 if (copyout(src_p, dst_addr, size)) {
11768                         kr = KERN_INVALID_ADDRESS;
11769                 }
11770                 vm_map_switch(oldmap);
11771                 vm_map_deallocate(map);
11772         }
11773         return kr;
11774 }
11775
11776 /*
11777  *      Routine:        vm_map_read_user
11778  *
11779  *      Description:
11780  *              Copy in data from a user space source map into the
11781  *              kernel map. The space must already exist in the
11782  *              kernel map.
11783  *              NOTE:  This routine should only be called by threads
11784  *              which can block on a page fault. i.e. kernel mode user
11785  *              threads.
11786  *
11787  */
11788 kern_return_t
11789 vm_map_read_user(
11790         vm_map_t                map,
11791         vm_map_address_t        src_addr,
11792         void                    *dst_p,
11793         vm_size_t               size)
11794 {
11795         kern_return_t   kr = KERN_SUCCESS;
11796
11797         if(current_map() == map) {
11798                 if (copyin(src_addr, dst_p, size)) {
11799                         kr = KERN_INVALID_ADDRESS;
11800                 }
11801         } else {
11802                 vm_map_t        oldmap;
11803
11804                 /* take on the identity of the target map while doing */
11805                 /* the transfer */
11806
11807                 vm_map_reference(map);
11808                 oldmap = vm_map_switch(map);
11809                 if (copyin(src_addr, dst_p, size)) {
11810                         kr = KERN_INVALID_ADDRESS;
11811                 }
11812                 vm_map_switch(oldmap);
11813                 vm_map_deallocate(map);
11814         }
11815         return kr;
11816 }
11817
11818
11819 /*
11820  *      vm_map_check_protection:
11821  *
11822  *      Assert that the target map allows the specified
11823  *      privilege on the entire address region given.
11824  *      The entire region must be allocated.
11825  */
11826 boolean_t
11827 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
11828                         vm_map_offset_t end, vm_prot_t protection)
11829 {
11830         vm_map_entry_t entry;
11831         vm_map_entry_t tmp_entry;
11832
11833         vm_map_lock(map);
11834
11835         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
11836         {
11837                 vm_map_unlock(map);
11838                 return (FALSE);
11839         }
11840
11841         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11842                 vm_map_unlock(map);
11843                 return(FALSE);
11844         }
11845
11846         entry = tmp_entry;
11847
11848         while (start < end) {
11849                 if (entry == vm_map_to_entry(map)) {
11850                         vm_map_unlock(map);
11851                         return(FALSE);
11852                 }
11853
11854                 /*
11855                  *      No holes allowed!
11856                  */
11857
11858                 if (start < entry->vme_start) {
11859                         vm_map_unlock(map);
11860                         return(FALSE);
11861                 }
11862
11863                 /*
11864                  * Check protection associated with entry.
11865                  */
11866
11867                 if ((entry->protection & protection) != protection) {
11868                         vm_map_unlock(map);
11869                         return(FALSE);
11870                 }
11871
11872                 /* go to next entry */
11873
11874                 start = entry->vme_end;
11875                 entry = entry->vme_next;
11876         }
11877         vm_map_unlock(map);
11878         return(TRUE);
11879 }
11880
11881 kern_return_t
11882 vm_map_purgable_control(
11883         vm_map_t                map,
11884         vm_map_offset_t         address,
11885         vm_purgable_t           control,
11886         int                     *state)
11887 {
11888         vm_map_entry_t          entry;
11889         vm_object_t             object;
11890         kern_return_t           kr;
11891
11892         /*
11893          * Vet all the input parameters and current type and state of the
11894          * underlaying object.  Return with an error if anything is amiss.
11895          */
11896         if (map == VM_MAP_NULL)
11897                 return(KERN_INVALID_ARGUMENT);
11898
11899         if (control != VM_PURGABLE_SET_STATE &&
11900             control != VM_PURGABLE_GET_STATE &&
11901             control != VM_PURGABLE_PURGE_ALL)
11902                 return(KERN_INVALID_ARGUMENT);
11903
11904         if (control == VM_PURGABLE_PURGE_ALL) {
11905                 vm_purgeable_object_purge_all();
11906                 return KERN_SUCCESS;
11907         }
11908
11909         if (control == VM_PURGABLE_SET_STATE &&
11910             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
11911              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
11912                 return(KERN_INVALID_ARGUMENT);
11913
11914         vm_map_lock_read(map);
11915
11916         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
11917
11918                 /*
11919                  * Must pass a valid non-submap address.
11920                  */
11921                 vm_map_unlock_read(map);
11922                 return(KERN_INVALID_ADDRESS);
11923         }
11924
11925         if ((entry->protection & VM_PROT_WRITE) == 0) {
11926                 /*
11927                  * Can't apply purgable controls to something you can't write.
11928                  */
11929                 vm_map_unlock_read(map);
11930                 return(KERN_PROTECTION_FAILURE);
11931         }
11932
11933         object = entry->object.vm_object;
11934         if (object == VM_OBJECT_NULL) {
11935                 /*
11936                  * Object must already be present or it can't be purgable.
11937                  */
11938                 vm_map_unlock_read(map);
11939                 return KERN_INVALID_ARGUMENT;
11940         }
11941
11942         vm_object_lock(object);
11943
11944         if (entry->offset != 0 ||
11945             entry->vme_end - entry->vme_start != object->vo_size) {
11946                 /*
11947                  * Can only apply purgable controls to the whole (existing)
11948                  * object at once.
11949                  */
11950                 vm_map_unlock_read(map);
11951                 vm_object_unlock(object);
11952                 return KERN_INVALID_ARGUMENT;
11953         }
11954
11955         vm_map_unlock_read(map);
11956
11957         kr = vm_object_purgable_control(object, control, state);
11958
11959         vm_object_unlock(object);
11960
11961         return kr;
11962 }
11963
11964 kern_return_t
11965 vm_map_page_query_internal(
11966         vm_map_t        target_map,
11967         vm_map_offset_t offset,
11968         int             *disposition,
11969         int             *ref_count)
11970 {
11971         kern_return_t                   kr;
11972         vm_page_info_basic_data_t       info;
11973         mach_msg_type_number_t          count;
11974
11975         count = VM_PAGE_INFO_BASIC_COUNT;
11976         kr = vm_map_page_info(target_map,
11977                               offset,
11978                               VM_PAGE_INFO_BASIC,
11979                               (vm_page_info_t) &info,
11980                               &count);
11981         if (kr == KERN_SUCCESS) {
11982                 *disposition = info.disposition;
11983                 *ref_count = info.ref_count;
11984         } else {
11985                 *disposition = 0;
11986                 *ref_count = 0;
11987         }
11988
11989         return kr;
11990 }
11991
11992 kern_return_t
11993 vm_map_page_info(
11994         vm_map_t                map,
11995         vm_map_offset_t         offset,
11996         vm_page_info_flavor_t   flavor,
11997         vm_page_info_t          info,
11998         mach_msg_type_number_t  *count)
11999 {
12000         vm_map_entry_t          map_entry;
12001         vm_object_t             object;
12002         vm_page_t               m;
12003         kern_return_t           kr;
12004         kern_return_t           retval = KERN_SUCCESS;
12005         boolean_t               top_object;
12006         int                     disposition;
12007         int                     ref_count;
12008         vm_object_id_t          object_id;
12009         vm_page_info_basic_t    basic_info;
12010         int                     depth;
12011         vm_map_offset_t         offset_in_page;
12012
12013         switch (flavor) {
12014         case VM_PAGE_INFO_BASIC:
12015                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
12016                         /*
12017                          * The "vm_page_info_basic_data" structure was not
12018                          * properly padded, so allow the size to be off by
12019                          * one to maintain backwards binary compatibility...
12020                          */
12021                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
12022                                 return KERN_INVALID_ARGUMENT;
12023                 }
12024                 break;
12025         default:
12026                 return KERN_INVALID_ARGUMENT;
12027         }
12028
12029         disposition = 0;
12030         ref_count = 0;
12031         object_id = 0;
12032         top_object = TRUE;
12033         depth = 0;
12034
12035         retval = KERN_SUCCESS;
12036         offset_in_page = offset & PAGE_MASK;
12037         offset = vm_map_trunc_page(offset);
12038
12039         vm_map_lock_read(map);
12040
12041         /*
12042          * First, find the map entry covering "offset", going down
12043          * submaps if necessary.
12044          */
12045         for (;;) {
12046                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
12047                         vm_map_unlock_read(map);
12048                         return KERN_INVALID_ADDRESS;
12049                 }
12050                 /* compute offset from this map entry's start */
12051                 offset -= map_entry->vme_start;
12052                 /* compute offset into this map entry's object (or submap) */
12053                 offset += map_entry->offset;
12054
12055                 if (map_entry->is_sub_map) {
12056                         vm_map_t sub_map;
12057
12058                         sub_map = map_entry->object.sub_map;
12059                         vm_map_lock_read(sub_map);
12060                         vm_map_unlock_read(map);
12061
12062                         map = sub_map;
12063
12064                         ref_count = MAX(ref_count, map->ref_count);
12065                         continue;
12066                 }
12067                 break;
12068         }
12069
12070         object = map_entry->object.vm_object;
12071         if (object == VM_OBJECT_NULL) {
12072                 /* no object -> no page */
12073                 vm_map_unlock_read(map);
12074                 goto done;
12075         }
12076
12077         vm_object_lock(object);
12078         vm_map_unlock_read(map);
12079
12080         /*
12081          * Go down the VM object shadow chain until we find the page
12082          * we're looking for.
12083          */
12084         for (;;) {
12085                 ref_count = MAX(ref_count, object->ref_count);
12086
12087                 m = vm_page_lookup(object, offset);
12088
12089                 if (m != VM_PAGE_NULL) {
12090                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
12091                         break;
12092                 } else {
12093 #if MACH_PAGEMAP
12094                         if (object->existence_map) {
12095                                 if (vm_external_state_get(object->existence_map,
12096                                                           offset) ==
12097                                     VM_EXTERNAL_STATE_EXISTS) {
12098                                         /*
12099                                          * this page has been paged out
12100                                          */
12101                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12102                                         break;
12103                                 }
12104                         } else
12105 #endif
12106                         {
12107                                 if (object->internal &&
12108                                     object->alive &&
12109                                     !object->terminating &&
12110                                     object->pager_ready) {
12111
12112                                         memory_object_t pager;
12113
12114                                         vm_object_paging_begin(object);
12115                                         pager = object->pager;
12116                                         vm_object_unlock(object);
12117
12118                                         /*
12119                                          * Ask the default pager if
12120                                          * it has this page.
12121                                          */
12122                                         kr = memory_object_data_request(
12123                                                 pager,
12124                                                 offset + object->paging_offset,
12125                                                 0, /* just poke the pager */
12126                                                 VM_PROT_READ,
12127                                                 NULL);
12128
12129                                         vm_object_lock(object);
12130                                         vm_object_paging_end(object);
12131
12132                                         if (kr == KERN_SUCCESS) {
12133                                                 /* the default pager has it */
12134                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
12135                                                 break;
12136                                         }
12137                                 }
12138                         }
12139
12140                         if (object->shadow != VM_OBJECT_NULL) {
12141                                 vm_object_t shadow;
12142
12143                                 offset += object->vo_shadow_offset;
12144                                 shadow = object->shadow;
12145
12146                                 vm_object_lock(shadow);
12147                                 vm_object_unlock(object);
12148
12149                                 object = shadow;
12150                                 top_object = FALSE;
12151                                 depth++;
12152                         } else {
12153 //                              if (!object->internal)
12154 //                                      break;
12155 //                              retval = KERN_FAILURE;
12156 //                              goto done_with_object;
12157                                 break;
12158                         }
12159                 }
12160         }
12161         /* The ref_count is not strictly accurate, it measures the number   */
12162         /* of entities holding a ref on the object, they may not be mapping */
12163         /* the object or may not be mapping the section holding the         */
12164         /* target page but its still a ball park number and though an over- */
12165         /* count, it picks up the copy-on-write cases                       */
12166
12167         /* We could also get a picture of page sharing from pmap_attributes */
12168         /* but this would under count as only faulted-in mappings would     */
12169         /* show up.                                                         */
12170
12171         if (top_object == TRUE && object->shadow)
12172                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
12173
12174         if (! object->internal)
12175                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
12176
12177         if (m == VM_PAGE_NULL)
12178                 goto done_with_object;
12179
12180         if (m->fictitious) {
12181                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
12182                 goto done_with_object;
12183         }
12184         if (m->dirty || pmap_is_modified(m->phys_page))
12185                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
12186
12187         if (m->reference || pmap_is_referenced(m->phys_page))
12188                 disposition |= VM_PAGE_QUERY_PAGE_REF;
12189
12190         if (m->speculative)
12191                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
12192
12193         if (m->cs_validated)
12194                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
12195         if (m->cs_tainted)
12196                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
12197
12198 done_with_object:
12199         vm_object_unlock(object);
12200 done:
12201
12202         switch (flavor) {
12203         case VM_PAGE_INFO_BASIC:
12204                 basic_info = (vm_page_info_basic_t) info;
12205                 basic_info->disposition = disposition;
12206                 basic_info->ref_count = ref_count;
12207                 basic_info->object_id = (vm_object_id_t) (uintptr_t) object;
12208                 basic_info->offset =
12209                         (memory_object_offset_t) offset + offset_in_page;
12210                 basic_info->depth = depth;
12211                 break;
12212         }
12213
12214         return retval;
12215 }
12216
12217 /*
12218  *      vm_map_msync
12219  *
12220  *      Synchronises the memory range specified with its backing store
12221  *      image by either flushing or cleaning the contents to the appropriate
12222  *      memory manager engaging in a memory object synchronize dialog with
12223  *      the manager.  The client doesn't return until the manager issues
12224  *      m_o_s_completed message.  MIG Magically converts user task parameter
12225  *      to the task's address map.
12226  *
12227  *      interpretation of sync_flags
12228  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
12229  *                                pages to manager.
12230  *
12231  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
12232  *                              - discard pages, write dirty or precious
12233  *                                pages back to memory manager.
12234  *
12235  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
12236  *                              - write dirty or precious pages back to
12237  *                                the memory manager.
12238  *
12239  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
12240  *                                is a hole in the region, and we would
12241  *                                have returned KERN_SUCCESS, return
12242  *                                KERN_INVALID_ADDRESS instead.
12243  *
12244  *      NOTE
12245  *      The memory object attributes have not yet been implemented, this
12246  *      function will have to deal with the invalidate attribute
12247  *
12248  *      RETURNS
12249  *      KERN_INVALID_TASK               Bad task parameter
12250  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
12251  *      KERN_SUCCESS                    The usual.
12252  *      KERN_INVALID_ADDRESS            There was a hole in the region.
12253  */
12254
12255 kern_return_t
12256 vm_map_msync(
12257         vm_map_t                map,
12258         vm_map_address_t        address,
12259         vm_map_size_t           size,
12260         vm_sync_t               sync_flags)
12261 {
12262         msync_req_t             msr;
12263         msync_req_t             new_msr;
12264         queue_chain_t           req_q;  /* queue of requests for this msync */
12265         vm_map_entry_t          entry;
12266         vm_map_size_t           amount_left;
12267         vm_object_offset_t      offset;
12268         boolean_t               do_sync_req;
12269         boolean_t               had_hole = FALSE;
12270         memory_object_t         pager;
12271
12272         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
12273             (sync_flags & VM_SYNC_SYNCHRONOUS))
12274                 return(KERN_INVALID_ARGUMENT);
12275
12276         /*
12277          * align address and size on page boundaries
12278          */
12279         size = vm_map_round_page(address + size) - vm_map_trunc_page(address);
12280         address = vm_map_trunc_page(address);
12281
12282         if (map == VM_MAP_NULL)
12283                 return(KERN_INVALID_TASK);
12284
12285         if (size == 0)
12286                 return(KERN_SUCCESS);
12287
12288         queue_init(&req_q);
12289         amount_left = size;
12290
12291         while (amount_left > 0) {
12292                 vm_object_size_t        flush_size;
12293                 vm_object_t             object;
12294
12295                 vm_map_lock(map);
12296                 if (!vm_map_lookup_entry(map,
12297                                          vm_map_trunc_page(address), &entry)) {
12298
12299                         vm_map_size_t   skip;
12300
12301                         /*
12302                          * hole in the address map.
12303                          */
12304                         had_hole = TRUE;
12305
12306                         /*
12307                          * Check for empty map.
12308                          */
12309                         if (entry == vm_map_to_entry(map) &&
12310                             entry->vme_next == entry) {
12311                                 vm_map_unlock(map);
12312                                 break;
12313                         }
12314                         /*
12315                          * Check that we don't wrap and that
12316                          * we have at least one real map entry.
12317                          */
12318                         if ((map->hdr.nentries == 0) ||
12319                             (entry->vme_next->vme_start < address)) {
12320                                 vm_map_unlock(map);
12321                                 break;
12322                         }
12323                         /*
12324                          * Move up to the next entry if needed
12325                          */
12326                         skip = (entry->vme_next->vme_start - address);
12327                         if (skip >= amount_left)
12328                                 amount_left = 0;
12329                         else
12330                                 amount_left -= skip;
12331                         address = entry->vme_next->vme_start;
12332                         vm_map_unlock(map);
12333                         continue;
12334                 }
12335
12336                 offset = address - entry->vme_start;
12337
12338                 /*
12339                  * do we have more to flush than is contained in this
12340                  * entry ?
12341                  */
12342                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
12343                         flush_size = entry->vme_end -
12344                                 (entry->vme_start + offset);
12345                 } else {
12346                         flush_size = amount_left;
12347                 }
12348                 amount_left -= flush_size;
12349                 address += flush_size;
12350
12351                 if (entry->is_sub_map == TRUE) {
12352                         vm_map_t        local_map;
12353                         vm_map_offset_t local_offset;
12354
12355                         local_map = entry->object.sub_map;
12356                         local_offset = entry->offset;
12357                         vm_map_unlock(map);
12358                         if (vm_map_msync(
12359                                     local_map,
12360                                     local_offset,
12361                                     flush_size,
12362                                     sync_flags) == KERN_INVALID_ADDRESS) {
12363                                 had_hole = TRUE;
12364                         }
12365                         continue;
12366                 }
12367                 object = entry->object.vm_object;
12368
12369                 /*
12370                  * We can't sync this object if the object has not been
12371                  * created yet
12372                  */
12373                 if (object == VM_OBJECT_NULL) {
12374                         vm_map_unlock(map);
12375                         continue;
12376                 }
12377                 offset += entry->offset;
12378
12379                 vm_object_lock(object);
12380
12381                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
12382                         int kill_pages = 0;
12383                         boolean_t reusable_pages = FALSE;
12384
12385                         if (sync_flags & VM_SYNC_KILLPAGES) {
12386                                 if (object->ref_count == 1 && !object->shadow)
12387                                         kill_pages = 1;
12388                                 else
12389                                         kill_pages = -1;
12390                         }
12391                         if (kill_pages != -1)
12392                                 vm_object_deactivate_pages(object, offset,
12393                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
12394                         vm_object_unlock(object);
12395                         vm_map_unlock(map);
12396                         continue;
12397                 }
12398                 /*
12399                  * We can't sync this object if there isn't a pager.
12400                  * Don't bother to sync internal objects, since there can't
12401                  * be any "permanent" storage for these objects anyway.
12402                  */
12403                 if ((object->pager == MEMORY_OBJECT_NULL) ||
12404                     (object->internal) || (object->private)) {
12405                         vm_object_unlock(object);
12406                         vm_map_unlock(map);
12407                         continue;
12408                 }
12409                 /*
12410                  * keep reference on the object until syncing is done
12411                  */
12412                 vm_object_reference_locked(object);
12413                 vm_object_unlock(object);
12414
12415                 vm_map_unlock(map);
12416
12417                 do_sync_req = vm_object_sync(object,
12418                                              offset,
12419                                              flush_size,
12420                                              sync_flags & VM_SYNC_INVALIDATE,
12421                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
12422                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
12423                                              sync_flags & VM_SYNC_SYNCHRONOUS);
12424                 /*
12425                  * only send a m_o_s if we returned pages or if the entry
12426                  * is writable (ie dirty pages may have already been sent back)
12427                  */
12428                 if (!do_sync_req) {
12429                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
12430                                 /*
12431                                  * clear out the clustering and read-ahead hints
12432                                  */
12433                                 vm_object_lock(object);
12434
12435                                 object->pages_created = 0;
12436                                 object->pages_used = 0;
12437                                 object->sequential = 0;
12438                                 object->last_alloc = 0;
12439
12440                                 vm_object_unlock(object);
12441                         }
12442                         vm_object_deallocate(object);
12443                         continue;
12444                 }
12445                 msync_req_alloc(new_msr);
12446
12447                 vm_object_lock(object);
12448                 offset += object->paging_offset;
12449
12450                 new_msr->offset = offset;
12451                 new_msr->length = flush_size;
12452                 new_msr->object = object;
12453                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
12454         re_iterate:
12455
12456                 /*
12457                  * We can't sync this object if there isn't a pager.  The
12458                  * pager can disappear anytime we're not holding the object
12459                  * lock.  So this has to be checked anytime we goto re_iterate.
12460                  */
12461
12462                 pager = object->pager;
12463
12464                 if (pager == MEMORY_OBJECT_NULL) {
12465                         vm_object_unlock(object);
12466                         vm_object_deallocate(object);
12467                         continue;
12468                 }
12469
12470                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
12471                         /*
12472                          * need to check for overlapping entry, if found, wait
12473                          * on overlapping msr to be done, then reiterate
12474                          */
12475                         msr_lock(msr);
12476                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
12477                             ((offset >= msr->offset &&
12478                               offset < (msr->offset + msr->length)) ||
12479                              (msr->offset >= offset &&
12480                               msr->offset < (offset + flush_size))))
12481                         {
12482                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
12483                                 msr_unlock(msr);
12484                                 vm_object_unlock(object);
12485                                 thread_block(THREAD_CONTINUE_NULL);
12486                                 vm_object_lock(object);
12487                                 goto re_iterate;
12488                         }
12489                         msr_unlock(msr);
12490                 }/* queue_iterate */
12491
12492                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
12493
12494                 vm_object_paging_begin(object);
12495                 vm_object_unlock(object);
12496
12497                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
12498
12499                 (void) memory_object_synchronize(
12500                         pager,
12501                         offset,
12502                         flush_size,
12503                         sync_flags & ~VM_SYNC_CONTIGUOUS);
12504
12505                 vm_object_lock(object);
12506                 vm_object_paging_end(object);
12507                 vm_object_unlock(object);
12508         }/* while */
12509
12510         /*
12511          * wait for memory_object_sychronize_completed messages from pager(s)
12512          */
12513
12514         while (!queue_empty(&req_q)) {
12515                 msr = (msync_req_t)queue_first(&req_q);
12516                 msr_lock(msr);
12517                 while(msr->flag != VM_MSYNC_DONE) {
12518                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
12519                         msr_unlock(msr);
12520                         thread_block(THREAD_CONTINUE_NULL);
12521                         msr_lock(msr);
12522                 }/* while */
12523                 queue_remove(&req_q, msr, msync_req_t, req_q);
12524                 msr_unlock(msr);
12525                 vm_object_deallocate(msr->object);
12526                 msync_req_free(msr);
12527         }/* queue_iterate */
12528
12529         /* for proper msync() behaviour */
12530         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
12531                 return(KERN_INVALID_ADDRESS);
12532
12533         return(KERN_SUCCESS);
12534 }/* vm_msync */
12535
12536 /*
12537  *      Routine:        convert_port_entry_to_map
12538  *      Purpose:
12539  *              Convert from a port specifying an entry or a task
12540  *              to a map. Doesn't consume the port ref; produces a map ref,
12541  *              which may be null.  Unlike convert_port_to_map, the
12542  *              port may be task or a named entry backed.
12543  *      Conditions:
12544  *              Nothing locked.
12545  */
12546
12547
12548 vm_map_t
12549 convert_port_entry_to_map(
12550         ipc_port_t      port)
12551 {
12552         vm_map_t map;
12553         vm_named_entry_t        named_entry;
12554         uint32_t        try_failed_count = 0;
12555
12556         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12557                 while(TRUE) {
12558                         ip_lock(port);
12559                         if(ip_active(port) && (ip_kotype(port)
12560                                                == IKOT_NAMED_ENTRY)) {
12561                                 named_entry =
12562                                         (vm_named_entry_t)port->ip_kobject;
12563                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12564                                         ip_unlock(port);
12565
12566                                         try_failed_count++;
12567                                         mutex_pause(try_failed_count);
12568                                         continue;
12569                                 }
12570                                 named_entry->ref_count++;
12571                                 lck_mtx_unlock(&(named_entry)->Lock);
12572                                 ip_unlock(port);
12573                                 if ((named_entry->is_sub_map) &&
12574                                     (named_entry->protection
12575                                      & VM_PROT_WRITE)) {
12576                                         map = named_entry->backing.map;
12577                                 } else {
12578                                         mach_destroy_memory_entry(port);
12579                                         return VM_MAP_NULL;
12580                                 }
12581                                 vm_map_reference_swap(map);
12582                                 mach_destroy_memory_entry(port);
12583                                 break;
12584                         }
12585                         else
12586                                 return VM_MAP_NULL;
12587                 }
12588         }
12589         else
12590                 map = convert_port_to_map(port);
12591
12592         return map;
12593 }
12594
12595 /*
12596  *      Routine:        convert_port_entry_to_object
12597  *      Purpose:
12598  *              Convert from a port specifying a named entry to an
12599  *              object. Doesn't consume the port ref; produces a map ref,
12600  *              which may be null.
12601  *      Conditions:
12602  *              Nothing locked.
12603  */
12604
12605
12606 vm_object_t
12607 convert_port_entry_to_object(
12608         ipc_port_t      port)
12609 {
12610         vm_object_t object;
12611         vm_named_entry_t        named_entry;
12612         uint32_t        try_failed_count = 0;
12613
12614         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
12615                 while(TRUE) {
12616                         ip_lock(port);
12617                         if(ip_active(port) && (ip_kotype(port)
12618                                                == IKOT_NAMED_ENTRY)) {
12619                                 named_entry =
12620                                         (vm_named_entry_t)port->ip_kobject;
12621                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
12622                                         ip_unlock(port);
12623
12624                                         try_failed_count++;
12625                                         mutex_pause(try_failed_count);
12626                                         continue;
12627                                 }
12628                                 named_entry->ref_count++;
12629                                 lck_mtx_unlock(&(named_entry)->Lock);
12630                                 ip_unlock(port);
12631                                 if ((!named_entry->is_sub_map) &&
12632                                     (!named_entry->is_pager) &&
12633                                     (named_entry->protection
12634                                      & VM_PROT_WRITE)) {
12635                                         object = named_entry->backing.object;
12636                                 } else {
12637                                         mach_destroy_memory_entry(port);
12638                                         return (vm_object_t)NULL;
12639                                 }
12640                                 vm_object_reference(named_entry->backing.object);
12641                                 mach_destroy_memory_entry(port);
12642                                 break;
12643                         }
12644                         else
12645                                 return (vm_object_t)NULL;
12646                 }
12647         } else {
12648                 return (vm_object_t)NULL;
12649         }
12650
12651         return object;
12652 }
12653
12654 /*
12655  * Export routines to other components for the things we access locally through
12656  * macros.
12657  */
12658 #undef current_map
12659 vm_map_t
12660 current_map(void)
12661 {
12662         return (current_map_fast());
12663 }
12664
12665 /*
12666  *      vm_map_reference:
12667  *
12668  *      Most code internal to the osfmk will go through a
12669  *      macro defining this.  This is always here for the
12670  *      use of other kernel components.
12671  */
12672 #undef vm_map_reference
12673 void
12674 vm_map_reference(
12675         register vm_map_t       map)
12676 {
12677         if (map == VM_MAP_NULL)
12678                 return;
12679
12680         lck_mtx_lock(&map->s_lock);
12681 #if     TASK_SWAPPER
12682         assert(map->res_count > 0);
12683         assert(map->ref_count >= map->res_count);
12684         map->res_count++;
12685 #endif
12686         map->ref_count++;
12687         lck_mtx_unlock(&map->s_lock);
12688 }
12689
12690 /*
12691  *      vm_map_deallocate:
12692  *
12693  *      Removes a reference from the specified map,
12694  *      destroying it if no references remain.
12695  *      The map should not be locked.
12696  */
12697 void
12698 vm_map_deallocate(
12699         register vm_map_t       map)
12700 {
12701         unsigned int            ref;
12702
12703         if (map == VM_MAP_NULL)
12704                 return;
12705
12706         lck_mtx_lock(&map->s_lock);
12707         ref = --map->ref_count;
12708         if (ref > 0) {
12709                 vm_map_res_deallocate(map);
12710                 lck_mtx_unlock(&map->s_lock);
12711                 return;
12712         }
12713         assert(map->ref_count == 0);
12714         lck_mtx_unlock(&map->s_lock);
12715
12716 #if     TASK_SWAPPER
12717         /*
12718          * The map residence count isn't decremented here because
12719          * the vm_map_delete below will traverse the entire map,
12720          * deleting entries, and the residence counts on objects
12721          * and sharing maps will go away then.
12722          */
12723 #endif
12724
12725         vm_map_destroy(map, VM_MAP_NO_FLAGS);
12726 }
12727
12728
12729 void
12730 vm_map_disable_NX(vm_map_t map)
12731 {
12732         if (map == NULL)
12733                 return;
12734         if (map->pmap == NULL)
12735                 return;
12736
12737         pmap_disable_NX(map->pmap);
12738 }
12739
12740 void
12741 vm_map_disallow_data_exec(vm_map_t map)
12742 {
12743     if (map == NULL)
12744         return;
12745
12746     map->map_disallow_data_exec = TRUE;
12747 }
12748
12749 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
12750  * more descriptive.
12751  */
12752 void
12753 vm_map_set_32bit(vm_map_t map)
12754 {
12755         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
12756 }
12757
12758
12759 void
12760 vm_map_set_64bit(vm_map_t map)
12761 {
12762         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
12763 }
12764
12765 vm_map_offset_t
12766 vm_compute_max_offset(unsigned is64)
12767 {
12768         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
12769 }
12770
12771 boolean_t
12772 vm_map_is_64bit(
12773                 vm_map_t map)
12774 {
12775         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
12776 }
12777
12778 boolean_t
12779 vm_map_has_hard_pagezero(
12780                 vm_map_t        map,
12781                 vm_map_offset_t pagezero_size)
12782 {
12783         /*
12784          * XXX FBDP
12785          * We should lock the VM map (for read) here but we can get away
12786          * with it for now because there can't really be any race condition:
12787          * the VM map's min_offset is changed only when the VM map is created
12788          * and when the zero page is established (when the binary gets loaded),
12789          * and this routine gets called only when the task terminates and the
12790          * VM map is being torn down, and when a new map is created via
12791          * load_machfile()/execve().
12792          */
12793         return (map->min_offset >= pagezero_size);
12794 }
12795
12796 void
12797 vm_map_set_4GB_pagezero(vm_map_t map)
12798 {
12799 #if defined(__i386__)
12800         pmap_set_4GB_pagezero(map->pmap);
12801 #else
12802 #pragma unused(map)
12803 #endif
12804
12805 }
12806
12807 void
12808 vm_map_clear_4GB_pagezero(vm_map_t map)
12809 {
12810 #if defined(__i386__)
12811         pmap_clear_4GB_pagezero(map->pmap);
12812 #else
12813 #pragma unused(map)
12814 #endif
12815 }
12816
12817 /*
12818  * Raise a VM map's maximun offset.
12819  */
12820 kern_return_t
12821 vm_map_raise_max_offset(
12822         vm_map_t        map,
12823         vm_map_offset_t new_max_offset)
12824 {
12825         kern_return_t   ret;
12826
12827         vm_map_lock(map);
12828         ret = KERN_INVALID_ADDRESS;
12829
12830         if (new_max_offset >= map->max_offset) {
12831                 if (!vm_map_is_64bit(map)) {
12832                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
12833                                 map->max_offset = new_max_offset;
12834                                 ret = KERN_SUCCESS;
12835                         }
12836                 } else {
12837                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
12838                                 map->max_offset = new_max_offset;
12839                                 ret = KERN_SUCCESS;
12840                         }
12841                 }
12842         }
12843
12844         vm_map_unlock(map);
12845         return ret;
12846 }
12847
12848
12849 /*
12850  * Raise a VM map's minimum offset.
12851  * To strictly enforce "page zero" reservation.
12852  */
12853 kern_return_t
12854 vm_map_raise_min_offset(
12855         vm_map_t        map,
12856         vm_map_offset_t new_min_offset)
12857 {
12858         vm_map_entry_t  first_entry;
12859
12860         new_min_offset = vm_map_round_page(new_min_offset);
12861
12862         vm_map_lock(map);
12863
12864         if (new_min_offset < map->min_offset) {
12865                 /*
12866                  * Can't move min_offset backwards, as that would expose
12867                  * a part of the address space that was previously, and for
12868                  * possibly good reasons, inaccessible.
12869                  */
12870                 vm_map_unlock(map);
12871                 return KERN_INVALID_ADDRESS;
12872         }
12873
12874         first_entry = vm_map_first_entry(map);
12875         if (first_entry != vm_map_to_entry(map) &&
12876             first_entry->vme_start < new_min_offset) {
12877                 /*
12878                  * Some memory was already allocated below the new
12879                  * minimun offset.  It's too late to change it now...
12880                  */
12881                 vm_map_unlock(map);
12882                 return KERN_NO_SPACE;
12883         }
12884
12885         map->min_offset = new_min_offset;
12886
12887         vm_map_unlock(map);
12888
12889         return KERN_SUCCESS;
12890 }
12891
12892 /*
12893  * Set the limit on the maximum amount of user wired memory allowed for this map.
12894  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
12895  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
12896  * don't have to reach over to the BSD data structures.
12897  */
12898
12899 void
12900 vm_map_set_user_wire_limit(vm_map_t     map,
12901                            vm_size_t    limit)
12902 {
12903         map->user_wire_limit = limit;
12904 }
12905
12906
12907 void vm_map_switch_protect(vm_map_t     map,
12908                            boolean_t    val)
12909 {
12910         vm_map_lock(map);
12911         map->switch_protect=val;
12912         vm_map_unlock(map);
12913 }
12914
12915 /* Add (generate) code signature for memory range */
12916 #if CONFIG_DYNAMIC_CODE_SIGNING
12917 kern_return_t vm_map_sign(vm_map_t map,
12918                  vm_map_offset_t start,
12919                  vm_map_offset_t end)
12920 {
12921         vm_map_entry_t entry;
12922         vm_page_t m;
12923         vm_object_t object;
12924
12925         /*
12926          * Vet all the input parameters and current type and state of the
12927          * underlaying object.  Return with an error if anything is amiss.
12928          */
12929         if (map == VM_MAP_NULL)
12930                 return(KERN_INVALID_ARGUMENT);
12931
12932         vm_map_lock_read(map);
12933
12934         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
12935                 /*
12936                  * Must pass a valid non-submap address.
12937                  */
12938                 vm_map_unlock_read(map);
12939                 return(KERN_INVALID_ADDRESS);
12940         }
12941
12942         if((entry->vme_start > start) || (entry->vme_end < end)) {
12943                 /*
12944                  * Map entry doesn't cover the requested range. Not handling
12945                  * this situation currently.
12946                  */
12947                 vm_map_unlock_read(map);
12948                 return(KERN_INVALID_ARGUMENT);
12949         }
12950
12951         object = entry->object.vm_object;
12952         if (object == VM_OBJECT_NULL) {
12953                 /*
12954                  * Object must already be present or we can't sign.
12955                  */
12956                 vm_map_unlock_read(map);
12957                 return KERN_INVALID_ARGUMENT;
12958         }
12959
12960         vm_object_lock(object);
12961         vm_map_unlock_read(map);
12962
12963         while(start < end) {
12964                 uint32_t refmod;
12965
12966                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
12967                 if (m==VM_PAGE_NULL) {
12968                         /* shoud we try to fault a page here? we can probably
12969                          * demand it exists and is locked for this request */
12970                         vm_object_unlock(object);
12971                         return KERN_FAILURE;
12972                 }
12973                 /* deal with special page status */
12974                 if (m->busy ||
12975                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
12976                         vm_object_unlock(object);
12977                         return KERN_FAILURE;
12978                 }
12979
12980                 /* Page is OK... now "validate" it */
12981                 /* This is the place where we'll call out to create a code
12982                  * directory, later */
12983                 m->cs_validated = TRUE;
12984
12985                 /* The page is now "clean" for codesigning purposes. That means
12986                  * we don't consider it as modified (wpmapped) anymore. But
12987                  * we'll disconnect the page so we note any future modification
12988                  * attempts. */
12989                 m->wpmapped = FALSE;
12990                 refmod = pmap_disconnect(m->phys_page);
12991
12992                 /* Pull the dirty status from the pmap, since we cleared the
12993                  * wpmapped bit */
12994                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
12995                         SET_PAGE_DIRTY(m, FALSE);
12996                 }
12997
12998                 /* On to the next page */
12999                 start += PAGE_SIZE;
13000         }
13001         vm_object_unlock(object);
13002
13003         return KERN_SUCCESS;
13004 }
13005 #endif
13006
13007 #if CONFIG_FREEZE
13008
13009 kern_return_t vm_map_freeze_walk(
13010                 vm_map_t map,
13011                 unsigned int *purgeable_count,
13012                 unsigned int *wired_count,
13013                 unsigned int *clean_count,
13014                 unsigned int *dirty_count,
13015                 unsigned int  dirty_budget,
13016                 boolean_t *has_shared)
13017 {
13018         vm_map_entry_t entry;
13019
13020         vm_map_lock_read(map);
13021
13022         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13023         *has_shared = FALSE;
13024
13025         for (entry = vm_map_first_entry(map);
13026              entry != vm_map_to_entry(map);
13027              entry = entry->vme_next) {
13028                 unsigned int purgeable, clean, dirty, wired;
13029                 boolean_t shared;
13030
13031                 if ((entry->object.vm_object == 0) ||
13032                     (entry->is_sub_map) ||
13033                     (entry->object.vm_object->phys_contiguous)) {
13034                         continue;
13035                 }
13036
13037                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
13038
13039                 *purgeable_count += purgeable;
13040                 *wired_count += wired;
13041                 *clean_count += clean;
13042                 *dirty_count += dirty;
13043
13044                 if (shared) {
13045                         *has_shared = TRUE;
13046                 }
13047
13048                 /* Adjust pageout budget and finish up if reached */
13049                 if (dirty_budget) {
13050                         dirty_budget -= dirty;
13051                         if (dirty_budget == 0) {
13052                                 break;
13053                         }
13054                 }
13055         }
13056
13057         vm_map_unlock_read(map);
13058
13059         return KERN_SUCCESS;
13060 }
13061
13062 kern_return_t vm_map_freeze(
13063                 vm_map_t map,
13064                 unsigned int *purgeable_count,
13065                 unsigned int *wired_count,
13066                 unsigned int *clean_count,
13067                 unsigned int *dirty_count,
13068                 unsigned int dirty_budget,
13069                 boolean_t *has_shared)
13070 {
13071         vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL;
13072         kern_return_t kr = KERN_SUCCESS;
13073
13074         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
13075         *has_shared = FALSE;
13076
13077         /*
13078          * We need the exclusive lock here so that we can
13079          * block any page faults or lookups while we are
13080          * in the middle of freezing this vm map.
13081          */
13082         vm_map_lock(map);
13083
13084         if (map->default_freezer_handle == NULL) {
13085                 map->default_freezer_handle = default_freezer_handle_allocate();
13086         }
13087
13088         if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
13089                 /*
13090                  * Can happen if default_freezer_handle passed in is NULL
13091                  * Or, a table has already been allocated and associated
13092                  * with this handle, i.e. the map is already frozen.
13093                  */
13094                 goto done;
13095         }
13096
13097         for (entry2 = vm_map_first_entry(map);
13098              entry2 != vm_map_to_entry(map);
13099              entry2 = entry2->vme_next) {
13100
13101                 vm_object_t     src_object = entry2->object.vm_object;
13102
13103                 /* If eligible, scan the entry, moving eligible pages over to our parent object */
13104                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
13105                         unsigned int purgeable, clean, dirty, wired;
13106                         boolean_t shared;
13107
13108                         default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
13109                                                         src_object, map->default_freezer_handle);
13110
13111                         *purgeable_count += purgeable;
13112                         *wired_count += wired;
13113                         *clean_count += clean;
13114                         *dirty_count += dirty;
13115
13116                         /* Adjust pageout budget and finish up if reached */
13117                         if (dirty_budget) {
13118                                 dirty_budget -= dirty;
13119                                 if (dirty_budget == 0) {
13120                                         break;
13121                                 }
13122                         }
13123
13124                         if (shared) {
13125                                 *has_shared = TRUE;
13126                         }
13127                 }
13128         }
13129
13130         /* Finally, throw out the pages to swap */
13131         default_freezer_pageout(map->default_freezer_handle);
13132
13133 done:
13134         vm_map_unlock(map);
13135
13136         return kr;
13137 }
13138
13139 kern_return_t
13140 vm_map_thaw(
13141         vm_map_t map)
13142 {
13143         kern_return_t kr = KERN_SUCCESS;
13144
13145         vm_map_lock(map);
13146
13147         if (map->default_freezer_handle == NULL) {
13148                 /*
13149                  * This map is not in a frozen state.
13150                  */
13151                 kr = KERN_FAILURE;
13152                 goto out;
13153         }
13154
13155         default_freezer_unpack(map->default_freezer_handle);
13156 out:
13157         vm_map_unlock(map);
13158
13159         return kr;
13160 }
13161 #endif
13162
13163 #if !CONFIG_EMBEDDED
13164 /*
13165  * vm_map_entry_should_cow_for_true_share:
13166  *
13167  * Determines if the map entry should be clipped and setup for copy-on-write
13168  * to avoid applying "true_share" to a large VM object when only a subset is
13169  * targeted.
13170  *
13171  * For now, we target only the map entries created for the Objective C
13172  * Garbage Collector, which initially have the following properties:
13173  *      - alias == VM_MEMORY_MALLOC
13174  *      - wired_count == 0
13175  *      - !needs_copy
13176  * and a VM object with:
13177  *      - internal
13178  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
13179  *      - !true_share
13180  *      - vo_size == ANON_CHUNK_SIZE
13181  */
13182 boolean_t
13183 vm_map_entry_should_cow_for_true_share(
13184         vm_map_entry_t  entry)
13185 {
13186         vm_object_t     object;
13187
13188         if (entry->is_sub_map) {
13189                 /* entry does not point at a VM object */
13190                 return FALSE;
13191         }
13192
13193         if (entry->needs_copy) {
13194                 /* already set for copy_on_write: done! */
13195                 return FALSE;
13196         }
13197
13198         if (entry->alias != VM_MEMORY_MALLOC) {
13199                 /* not tagged as an ObjectiveC's Garbage Collector entry */
13200                 return FALSE;
13201         }
13202
13203         if (entry->wired_count) {
13204                 /* wired: can't change the map entry... */
13205                 return FALSE;
13206         }
13207
13208         object = entry->object.vm_object;
13209
13210         if (object == VM_OBJECT_NULL) {
13211                 /* no object yet... */
13212                 return FALSE;
13213         }
13214
13215         if (!object->internal) {
13216                 /* not an internal object */
13217                 return FALSE;
13218         }
13219
13220         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
13221                 /* not the default copy strategy */
13222                 return FALSE;
13223         }
13224
13225         if (object->true_share) {
13226                 /* already true_share: too late to avoid it */
13227                 return FALSE;
13228         }
13229
13230         if (object->vo_size != ANON_CHUNK_SIZE) {
13231                 /* not an object created for the ObjC Garbage Collector */
13232                 return FALSE;
13233         }
13234
13235         /*
13236          * All the criteria match: we have a large object being targeted for "true_share".
13237          * To limit the adverse side-effects linked with "true_share", tell the caller to
13238          * try and avoid setting up the entire object for "true_share" by clipping the
13239          * targeted range and setting it up for copy-on-write.
13240          */
13241         return TRUE;
13242 }
13243 #endif /* !CONFIG_EMBEDDED */