osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/counters.h>
  86 #include <kern/kalloc.h>
  87 #include <kern/zalloc.h>
  88
  89 #include <vm/cpm.h>
  90 #include <vm/vm_compressor_pager.h>
  91 #include <vm/vm_init.h>
  92 #include <vm/vm_fault.h>
  93 #include <vm/vm_map.h>
  94 #include <vm/vm_object.h>
  95 #include <vm/vm_page.h>
  96 #include <vm/vm_pageout.h>
  97 #include <vm/vm_kern.h>
  98 #include <ipc/ipc_port.h>
  99 #include <kern/sched_prim.h>
 100 #include <kern/misc_protos.h>
 101 #include <kern/xpr.h>
 102
 103 #include <mach/vm_map_server.h>
 104 #include <mach/mach_host_server.h>
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_purgeable_internal.h>
 107
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_shared_region.h>
 110 #include <vm/vm_map_store.h>
 111
 112
 113 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 114 /* Internal prototypes
 115  */
 116
 117 static void vm_map_simplify_range(
 118         vm_map_t        map,
 119         vm_map_offset_t start,
 120         vm_map_offset_t end);   /* forward */
 121
 122 static boolean_t        vm_map_range_check(
 123         vm_map_t        map,
 124         vm_map_offset_t start,
 125         vm_map_offset_t end,
 126         vm_map_entry_t  *entry);
 127
 128 static vm_map_entry_t   _vm_map_entry_create(
 129         struct vm_map_header    *map_header, boolean_t map_locked);
 130
 131 static void             _vm_map_entry_dispose(
 132         struct vm_map_header    *map_header,
 133         vm_map_entry_t          entry);
 134
 135 static void             vm_map_pmap_enter(
 136         vm_map_t                map,
 137         vm_map_offset_t         addr,
 138         vm_map_offset_t         end_addr,
 139         vm_object_t             object,
 140         vm_object_offset_t      offset,
 141         vm_prot_t               protection);
 142
 143 static void             _vm_map_clip_end(
 144         struct vm_map_header    *map_header,
 145         vm_map_entry_t          entry,
 146         vm_map_offset_t         end);
 147
 148 static void             _vm_map_clip_start(
 149         struct vm_map_header    *map_header,
 150         vm_map_entry_t          entry,
 151         vm_map_offset_t         start);
 152
 153 static void             vm_map_entry_delete(
 154         vm_map_t        map,
 155         vm_map_entry_t  entry);
 156
 157 static kern_return_t    vm_map_delete(
 158         vm_map_t        map,
 159         vm_map_offset_t start,
 160         vm_map_offset_t end,
 161         int             flags,
 162         vm_map_t        zap_map);
 163
 164 static kern_return_t    vm_map_copy_overwrite_unaligned(
 165         vm_map_t        dst_map,
 166         vm_map_entry_t  entry,
 167         vm_map_copy_t   copy,
 168         vm_map_address_t start,
 169         boolean_t       discard_on_success);
 170
 171 static kern_return_t    vm_map_copy_overwrite_aligned(
 172         vm_map_t        dst_map,
 173         vm_map_entry_t  tmp_entry,
 174         vm_map_copy_t   copy,
 175         vm_map_offset_t start,
 176         pmap_t          pmap);
 177
 178 static kern_return_t    vm_map_copyin_kernel_buffer(
 179         vm_map_t        src_map,
 180         vm_map_address_t src_addr,
 181         vm_map_size_t   len,
 182         boolean_t       src_destroy,
 183         vm_map_copy_t   *copy_result);  /* OUT */
 184
 185 static kern_return_t    vm_map_copyout_kernel_buffer(
 186         vm_map_t        map,
 187         vm_map_address_t *addr, /* IN/OUT */
 188         vm_map_copy_t   copy,
 189         boolean_t       overwrite,
 190         boolean_t       consume_on_success);
 191
 192 static void             vm_map_fork_share(
 193         vm_map_t        old_map,
 194         vm_map_entry_t  old_entry,
 195         vm_map_t        new_map);
 196
 197 static boolean_t        vm_map_fork_copy(
 198         vm_map_t        old_map,
 199         vm_map_entry_t  *old_entry_p,
 200         vm_map_t        new_map);
 201
 202 void            vm_map_region_top_walk(
 203         vm_map_entry_t             entry,
 204         vm_region_top_info_t       top);
 205
 206 void            vm_map_region_walk(
 207         vm_map_t                   map,
 208         vm_map_offset_t            va,
 209         vm_map_entry_t             entry,
 210         vm_object_offset_t         offset,
 211         vm_object_size_t           range,
 212         vm_region_extended_info_t  extended,
 213         boolean_t                  look_for_pages,
 214         mach_msg_type_number_t count);
 215
 216 static kern_return_t    vm_map_wire_nested(
 217         vm_map_t                   map,
 218         vm_map_offset_t            start,
 219         vm_map_offset_t            end,
 220         vm_prot_t                  caller_prot,
 221         boolean_t                  user_wire,
 222         pmap_t                     map_pmap,
 223         vm_map_offset_t            pmap_addr,
 224         ppnum_t                    *physpage_p);
 225
 226 static kern_return_t    vm_map_unwire_nested(
 227         vm_map_t                   map,
 228         vm_map_offset_t            start,
 229         vm_map_offset_t            end,
 230         boolean_t                  user_wire,
 231         pmap_t                     map_pmap,
 232         vm_map_offset_t            pmap_addr);
 233
 234 static kern_return_t    vm_map_overwrite_submap_recurse(
 235         vm_map_t                   dst_map,
 236         vm_map_offset_t            dst_addr,
 237         vm_map_size_t              dst_size);
 238
 239 static kern_return_t    vm_map_copy_overwrite_nested(
 240         vm_map_t                   dst_map,
 241         vm_map_offset_t            dst_addr,
 242         vm_map_copy_t              copy,
 243         boolean_t                  interruptible,
 244         pmap_t                     pmap,
 245         boolean_t                  discard_on_success);
 246
 247 static kern_return_t    vm_map_remap_extract(
 248         vm_map_t                map,
 249         vm_map_offset_t         addr,
 250         vm_map_size_t           size,
 251         boolean_t               copy,
 252         struct vm_map_header    *map_header,
 253         vm_prot_t               *cur_protection,
 254         vm_prot_t               *max_protection,
 255         vm_inherit_t            inheritance,
 256         boolean_t               pageable);
 257
 258 static kern_return_t    vm_map_remap_range_allocate(
 259         vm_map_t                map,
 260         vm_map_address_t        *address,
 261         vm_map_size_t           size,
 262         vm_map_offset_t         mask,
 263         int                     flags,
 264         vm_map_entry_t          *map_entry);
 265
 266 static void             vm_map_region_look_for_page(
 267         vm_map_t                   map,
 268         vm_map_offset_t            va,
 269         vm_object_t                object,
 270         vm_object_offset_t         offset,
 271         int                        max_refcnt,
 272         int                        depth,
 273         vm_region_extended_info_t  extended,
 274         mach_msg_type_number_t count);
 275
 276 static int              vm_map_region_count_obj_refs(
 277         vm_map_entry_t             entry,
 278         vm_object_t                object);
 279
 280
 281 static kern_return_t    vm_map_willneed(
 282         vm_map_t        map,
 283         vm_map_offset_t start,
 284         vm_map_offset_t end);
 285
 286 static kern_return_t    vm_map_reuse_pages(
 287         vm_map_t        map,
 288         vm_map_offset_t start,
 289         vm_map_offset_t end);
 290
 291 static kern_return_t    vm_map_reusable_pages(
 292         vm_map_t        map,
 293         vm_map_offset_t start,
 294         vm_map_offset_t end);
 295
 296 static kern_return_t    vm_map_can_reuse(
 297         vm_map_t        map,
 298         vm_map_offset_t start,
 299         vm_map_offset_t end);
 300
 301 #if MACH_ASSERT
 302 static kern_return_t    vm_map_pageout(
 303         vm_map_t        map,
 304         vm_map_offset_t start,
 305         vm_map_offset_t end);
 306 #endif /* MACH_ASSERT */
 307
 308 /*
 309  * Macros to copy a vm_map_entry. We must be careful to correctly
 310  * manage the wired page count. vm_map_entry_copy() creates a new
 311  * map entry to the same memory - the wired count in the new entry
 312  * must be set to zero. vm_map_entry_copy_full() creates a new
 313  * entry that is identical to the old entry.  This preserves the
 314  * wire count; it's used for map splitting and zone changing in
 315  * vm_map_copyout.
 316  */
 317
 318 #define vm_map_entry_copy(NEW,OLD)      \
 319 MACRO_BEGIN                             \
 320 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 321         *(NEW) = *(OLD);                \
 322         (NEW)->is_shared = FALSE;       \
 323         (NEW)->needs_wakeup = FALSE;    \
 324         (NEW)->in_transition = FALSE;   \
 325         (NEW)->wired_count = 0;         \
 326         (NEW)->user_wired_count = 0;    \
 327         (NEW)->permanent = FALSE;       \
 328         (NEW)->used_for_jit = FALSE;    \
 329         (NEW)->from_reserved_zone = _vmec_reserved;     \
 330         (NEW)->iokit_acct = FALSE;      \
 331         (NEW)->vme_resilient_codesign = FALSE; \
 332         (NEW)->vme_resilient_media = FALSE;     \
 333 MACRO_END
 334
 335 #define vm_map_entry_copy_full(NEW,OLD)                 \
 336 MACRO_BEGIN                                             \
 337 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 338 (*(NEW) = *(OLD));                                      \
 339 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 340 MACRO_END
 341
 342 /*
 343  *      Decide if we want to allow processes to execute from their data or stack areas.
 344  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 345  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 346  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 347  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 348  *      specific pmap files since the default behavior varies according to architecture.  The
 349  *      main reason it varies is because of the need to provide binary compatibility with old
 350  *      applications that were written before these restrictions came into being.  In the old
 351  *      days, an app could execute anything it could read, but this has slowly been tightened
 352  *      up over time.  The default behavior is:
 353  *
 354  *      32-bit PPC apps         may execute from both stack and data areas
 355  *      32-bit Intel apps       may exeucte from data areas but not stack
 356  *      64-bit PPC/Intel apps   may not execute from either data or stack
 357  *
 358  *      An application on any architecture may override these defaults by explicitly
 359  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 360  *      system call.  This code here just determines what happens when an app tries to
 361  *      execute from a page that lacks execute permission.
 362  *
 363  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 364  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 365  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 366  *      execution from data areas for a particular binary even if the arch normally permits it. As
 367  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 368  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 369  *      are not all NX-safe.
 370  */
 371
 372 extern int allow_data_exec, allow_stack_exec;
 373
 374 int
 375 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 376 {
 377         int current_abi;
 378
 379         if (map->pmap == kernel_pmap) return FALSE;
 380
 381         /*
 382          * Determine if the app is running in 32 or 64 bit mode.
 383          */
 384
 385         if (vm_map_is_64bit(map))
 386                 current_abi = VM_ABI_64;
 387         else
 388                 current_abi = VM_ABI_32;
 389
 390         /*
 391          * Determine if we should allow the execution based on whether it's a
 392          * stack or data area and the current architecture.
 393          */
 394
 395         if (user_tag == VM_MEMORY_STACK)
 396                 return allow_stack_exec & current_abi;
 397
 398         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 399 }
 400
 401
 402 /*
 403  *      Virtual memory maps provide for the mapping, protection,
 404  *      and sharing of virtual memory objects.  In addition,
 405  *      this module provides for an efficient virtual copy of
 406  *      memory from one map to another.
 407  *
 408  *      Synchronization is required prior to most operations.
 409  *
 410  *      Maps consist of an ordered doubly-linked list of simple
 411  *      entries; a single hint is used to speed up lookups.
 412  *
 413  *      Sharing maps have been deleted from this version of Mach.
 414  *      All shared objects are now mapped directly into the respective
 415  *      maps.  This requires a change in the copy on write strategy;
 416  *      the asymmetric (delayed) strategy is used for shared temporary
 417  *      objects instead of the symmetric (shadow) strategy.  All maps
 418  *      are now "top level" maps (either task map, kernel map or submap
 419  *      of the kernel map).
 420  *
 421  *      Since portions of maps are specified by start/end addreses,
 422  *      which may not align with existing map entries, all
 423  *      routines merely "clip" entries to these start/end values.
 424  *      [That is, an entry is split into two, bordering at a
 425  *      start or end value.]  Note that these clippings may not
 426  *      always be necessary (as the two resulting entries are then
 427  *      not changed); however, the clipping is done for convenience.
 428  *      No attempt is currently made to "glue back together" two
 429  *      abutting entries.
 430  *
 431  *      The symmetric (shadow) copy strategy implements virtual copy
 432  *      by copying VM object references from one map to
 433  *      another, and then marking both regions as copy-on-write.
 434  *      It is important to note that only one writeable reference
 435  *      to a VM object region exists in any map when this strategy
 436  *      is used -- this means that shadow object creation can be
 437  *      delayed until a write operation occurs.  The symmetric (delayed)
 438  *      strategy allows multiple maps to have writeable references to
 439  *      the same region of a vm object, and hence cannot delay creating
 440  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 441  *      Copying of permanent objects is completely different; see
 442  *      vm_object_copy_strategically() in vm_object.c.
 443  */
 444
 445 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 446 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 447 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 448                                          * allocations */
 449 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 450 zone_t          vm_map_holes_zone;      /* zone for vm map holes (vm_map_links) structures */
 451
 452
 453 /*
 454  *      Placeholder object for submap operations.  This object is dropped
 455  *      into the range by a call to vm_map_find, and removed when
 456  *      vm_map_submap creates the submap.
 457  */
 458
 459 vm_object_t     vm_submap_object;
 460
 461 static void             *map_data;
 462 static vm_size_t        map_data_size;
 463 static void             *kentry_data;
 464 static vm_size_t        kentry_data_size;
 465 static void             *map_holes_data;
 466 static vm_size_t        map_holes_data_size;
 467
 468 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 469
 470 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 471 unsigned int not_in_kdp = 1;
 472
 473 unsigned int vm_map_set_cache_attr_count = 0;
 474
 475 kern_return_t
 476 vm_map_set_cache_attr(
 477         vm_map_t        map,
 478         vm_map_offset_t va)
 479 {
 480         vm_map_entry_t  map_entry;
 481         vm_object_t     object;
 482         kern_return_t   kr = KERN_SUCCESS;
 483
 484         vm_map_lock_read(map);
 485
 486         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 487             map_entry->is_sub_map) {
 488                 /*
 489                  * that memory is not properly mapped
 490                  */
 491                 kr = KERN_INVALID_ARGUMENT;
 492                 goto done;
 493         }
 494         object = VME_OBJECT(map_entry);
 495
 496         if (object == VM_OBJECT_NULL) {
 497                 /*
 498                  * there should be a VM object here at this point
 499                  */
 500                 kr = KERN_INVALID_ARGUMENT;
 501                 goto done;
 502         }
 503         vm_object_lock(object);
 504         object->set_cache_attr = TRUE;
 505         vm_object_unlock(object);
 506
 507         vm_map_set_cache_attr_count++;
 508 done:
 509         vm_map_unlock_read(map);
 510
 511         return kr;
 512 }
 513
 514
 515 #if CONFIG_CODE_DECRYPTION
 516 /*
 517  * vm_map_apple_protected:
 518  * This remaps the requested part of the object with an object backed by
 519  * the decrypting pager.
 520  * crypt_info contains entry points and session data for the crypt module.
 521  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 522  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 523  */
 524 kern_return_t
 525 vm_map_apple_protected(
 526         vm_map_t                map,
 527         vm_map_offset_t         start,
 528         vm_map_offset_t         end,
 529         vm_object_offset_t      crypto_backing_offset,
 530         struct pager_crypt_info *crypt_info)
 531 {
 532         boolean_t       map_locked;
 533         kern_return_t   kr;
 534         vm_map_entry_t  map_entry;
 535         struct vm_map_entry tmp_entry;
 536         memory_object_t unprotected_mem_obj;
 537         vm_object_t     protected_object;
 538         vm_map_offset_t map_addr;
 539         vm_map_offset_t start_aligned, end_aligned;
 540         vm_object_offset_t      crypto_start, crypto_end;
 541         int             vm_flags;
 542
 543         map_locked = FALSE;
 544         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 545
 546         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 547         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 548         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 549         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 550
 551         assert(start_aligned == start);
 552         assert(end_aligned == end);
 553
 554         map_addr = start_aligned;
 555         for (map_addr = start_aligned;
 556              map_addr < end;
 557              map_addr = tmp_entry.vme_end) {
 558                 vm_map_lock(map);
 559                 map_locked = TRUE;
 560
 561                 /* lookup the protected VM object */
 562                 if (!vm_map_lookup_entry(map,
 563                                          map_addr,
 564                                          &map_entry) ||
 565                     map_entry->is_sub_map ||
 566                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 567                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 568                         /* that memory is not properly mapped */
 569                         kr = KERN_INVALID_ARGUMENT;
 570                         goto done;
 571                 }
 572
 573                 /* get the protected object to be decrypted */
 574                 protected_object = VME_OBJECT(map_entry);
 575                 if (protected_object == VM_OBJECT_NULL) {
 576                         /* there should be a VM object here at this point */
 577                         kr = KERN_INVALID_ARGUMENT;
 578                         goto done;
 579                 }
 580                 /* ensure protected object stays alive while map is unlocked */
 581                 vm_object_reference(protected_object);
 582
 583                 /* limit the map entry to the area we want to cover */
 584                 vm_map_clip_start(map, map_entry, start_aligned);
 585                 vm_map_clip_end(map, map_entry, end_aligned);
 586
 587                 tmp_entry = *map_entry;
 588                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 589                 vm_map_unlock(map);
 590                 map_locked = FALSE;
 591
 592                 /*
 593                  * This map entry might be only partially encrypted
 594                  * (if not fully "page-aligned").
 595                  */
 596                 crypto_start = 0;
 597                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 598                 if (tmp_entry.vme_start < start) {
 599                         if (tmp_entry.vme_start != start_aligned) {
 600                                 kr = KERN_INVALID_ADDRESS;
 601                         }
 602                         crypto_start += (start - tmp_entry.vme_start);
 603                 }
 604                 if (tmp_entry.vme_end > end) {
 605                         if (tmp_entry.vme_end != end_aligned) {
 606                                 kr = KERN_INVALID_ADDRESS;
 607                         }
 608                         crypto_end -= (tmp_entry.vme_end - end);
 609                 }
 610
 611                 /*
 612                  * This "extra backing offset" is needed to get the decryption
 613                  * routine to use the right key.  It adjusts for the possibly
 614                  * relative offset of an interposed "4K" pager...
 615                  */
 616                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 617                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 618                 }
 619
 620                 /*
 621                  * Lookup (and create if necessary) the protected memory object
 622                  * matching that VM object.
 623                  * If successful, this also grabs a reference on the memory object,
 624                  * to guarantee that it doesn't go away before we get a chance to map
 625                  * it.
 626                  */
 627                 unprotected_mem_obj = apple_protect_pager_setup(
 628                         protected_object,
 629                         VME_OFFSET(&tmp_entry),
 630                         crypto_backing_offset,
 631                         crypt_info,
 632                         crypto_start,
 633                         crypto_end);
 634
 635                 /* release extra ref on protected object */
 636                 vm_object_deallocate(protected_object);
 637
 638                 if (unprotected_mem_obj == NULL) {
 639                         kr = KERN_FAILURE;
 640                         goto done;
 641                 }
 642
 643                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 644
 645                 /* map this memory object in place of the current one */
 646                 map_addr = tmp_entry.vme_start;
 647                 kr = vm_map_enter_mem_object(map,
 648                                              &map_addr,
 649                                              (tmp_entry.vme_end -
 650                                               tmp_entry.vme_start),
 651                                              (mach_vm_offset_t) 0,
 652                                              vm_flags,
 653                                              (ipc_port_t) unprotected_mem_obj,
 654                                              0,
 655                                              TRUE,
 656                                              tmp_entry.protection,
 657                                              tmp_entry.max_protection,
 658                                              tmp_entry.inheritance);
 659                 assert(kr == KERN_SUCCESS);
 660                 assert(map_addr == tmp_entry.vme_start);
 661
 662 #if VM_MAP_DEBUG_APPLE_PROTECT
 663                 printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p: "
 664                        "backing:[object:%p,offset:0x%llx,"
 665                        "crypto_backing_offset:0x%llx,"
 666                        "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 667                        map,
 668                        (uint64_t) map_addr,
 669                        (uint64_t) (map_addr + (tmp_entry.vme_end -
 670                                                tmp_entry.vme_start)),
 671                        unprotected_mem_obj,
 672                        protected_object,
 673                        VME_OFFSET(&tmp_entry),
 674                        crypto_backing_offset,
 675                        crypto_start,
 676                        crypto_end);
 677 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 678
 679                 /*
 680                  * Release the reference obtained by
 681                  * apple_protect_pager_setup().
 682                  * The mapping (if it succeeded) is now holding a reference on
 683                  * the memory object.
 684                  */
 685                 memory_object_deallocate(unprotected_mem_obj);
 686                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 687
 688                 /* continue with next map entry */
 689                 crypto_backing_offset += (tmp_entry.vme_end -
 690                                           tmp_entry.vme_start);
 691                 crypto_backing_offset -= crypto_start;
 692         }
 693         kr = KERN_SUCCESS;
 694
 695 done:
 696         if (map_locked) {
 697                 vm_map_unlock(map);
 698         }
 699         return kr;
 700 }
 701 #endif  /* CONFIG_CODE_DECRYPTION */
 702
 703
 704 lck_grp_t               vm_map_lck_grp;
 705 lck_grp_attr_t  vm_map_lck_grp_attr;
 706 lck_attr_t              vm_map_lck_attr;
 707 lck_attr_t              vm_map_lck_rw_attr;
 708
 709
 710 /*
 711  *      vm_map_init:
 712  *
 713  *      Initialize the vm_map module.  Must be called before
 714  *      any other vm_map routines.
 715  *
 716  *      Map and entry structures are allocated from zones -- we must
 717  *      initialize those zones.
 718  *
 719  *      There are three zones of interest:
 720  *
 721  *      vm_map_zone:            used to allocate maps.
 722  *      vm_map_entry_zone:      used to allocate map entries.
 723  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 724  *
 725  *      The kernel allocates map entries from a special zone that is initially
 726  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 727  *      the kernel to allocate more memory to a entry zone when it became
 728  *      empty since the very act of allocating memory implies the creation
 729  *      of a new entry.
 730  */
 731 void
 732 vm_map_init(
 733         void)
 734 {
 735         vm_size_t entry_zone_alloc_size;
 736         const char *mez_name = "VM map entries";
 737
 738         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 739                             PAGE_SIZE, "maps");
 740         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 741 #if     defined(__LP64__)
 742         entry_zone_alloc_size = PAGE_SIZE * 5;
 743 #else
 744         entry_zone_alloc_size = PAGE_SIZE * 6;
 745 #endif
 746         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 747                                   1024*1024, entry_zone_alloc_size,
 748                                   mez_name);
 749         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 750         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 751         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 752
 753         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 754                                    kentry_data_size * 64, kentry_data_size,
 755                                    "Reserved VM map entries");
 756         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 757
 758         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 759                                  16*1024, PAGE_SIZE, "VM map copies");
 760         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 761
 762         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 763                                  16*1024, PAGE_SIZE, "VM map holes");
 764         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 765
 766         /*
 767          *      Cram the map and kentry zones with initial data.
 768          *      Set reserved_zone non-collectible to aid zone_gc().
 769          */
 770         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 771
 772         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 773         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 774         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 775         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 776         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 777         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 778         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 779
 780         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 781         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 782         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 783         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 784         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 785         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 786
 787         /*
 788          * Add the stolen memory to zones, adjust zone size and stolen counts.
 789          */
 790         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 791         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 792         zcram(vm_map_holes_zone, (vm_offset_t)map_holes_data, map_holes_data_size);
 793         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 794
 795         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 796         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 797         lck_attr_setdefault(&vm_map_lck_attr);
 798
 799         lck_attr_setdefault(&vm_map_lck_rw_attr);
 800         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 801
 802 #if CONFIG_FREEZE
 803         default_freezer_init();
 804 #endif /* CONFIG_FREEZE */
 805 }
 806
 807 void
 808 vm_map_steal_memory(
 809         void)
 810 {
 811         uint32_t kentry_initial_pages;
 812
 813         map_data_size = round_page(10 * sizeof(struct _vm_map));
 814         map_data = pmap_steal_memory(map_data_size);
 815
 816         /*
 817          * kentry_initial_pages corresponds to the number of kernel map entries
 818          * required during bootstrap until the asynchronous replenishment
 819          * scheme is activated and/or entries are available from the general
 820          * map entry pool.
 821          */
 822 #if     defined(__LP64__)
 823         kentry_initial_pages = 10;
 824 #else
 825         kentry_initial_pages = 6;
 826 #endif
 827
 828 #if CONFIG_GZALLOC
 829         /* If using the guard allocator, reserve more memory for the kernel
 830          * reserved map entry pool.
 831         */
 832         if (gzalloc_enabled())
 833                 kentry_initial_pages *= 1024;
 834 #endif
 835
 836         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 837         kentry_data = pmap_steal_memory(kentry_data_size);
 838
 839         map_holes_data_size = kentry_data_size;
 840         map_holes_data = pmap_steal_memory(map_holes_data_size);
 841 }
 842
 843 void
 844 vm_kernel_reserved_entry_init(void) {
 845         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 846         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
 847 }
 848
 849 void
 850 vm_map_disable_hole_optimization(vm_map_t map)
 851 {
 852         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
 853
 854         if (map->holelistenabled) {
 855
 856                 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
 857
 858                 while (hole_entry != NULL) {
 859
 860                         next_hole_entry = hole_entry->vme_next;
 861
 862                         hole_entry->vme_next = NULL;
 863                         hole_entry->vme_prev = NULL;
 864                         zfree(vm_map_holes_zone, hole_entry);
 865
 866                         if (next_hole_entry == head_entry) {
 867                                 hole_entry = NULL;
 868                         } else {
 869                                 hole_entry = next_hole_entry;
 870                         }
 871                 }
 872
 873                 map->holes_list = NULL;
 874                 map->holelistenabled = FALSE;
 875
 876                 map->first_free = vm_map_first_entry(map);
 877                 SAVE_HINT_HOLE_WRITE(map, NULL);
 878         }
 879 }
 880
 881 boolean_t
 882 vm_kernel_map_is_kernel(vm_map_t map) {
 883         return (map->pmap == kernel_pmap);
 884 }
 885
 886 /*
 887  *      vm_map_create:
 888  *
 889  *      Creates and returns a new empty VM map with
 890  *      the given physical map structure, and having
 891  *      the given lower and upper address bounds.
 892  */
 893
 894 boolean_t vm_map_supports_hole_optimization = TRUE;
 895
 896 vm_map_t
 897 vm_map_create(
 898         pmap_t                  pmap,
 899         vm_map_offset_t min,
 900         vm_map_offset_t max,
 901         boolean_t               pageable)
 902 {
 903         static int              color_seed = 0;
 904         register vm_map_t       result;
 905         struct vm_map_links     *hole_entry = NULL;
 906
 907         result = (vm_map_t) zalloc(vm_map_zone);
 908         if (result == VM_MAP_NULL)
 909                 panic("vm_map_create");
 910
 911         vm_map_first_entry(result) = vm_map_to_entry(result);
 912         vm_map_last_entry(result)  = vm_map_to_entry(result);
 913         result->hdr.nentries = 0;
 914         result->hdr.entries_pageable = pageable;
 915
 916         vm_map_store_init( &(result->hdr) );
 917
 918         result->hdr.page_shift = PAGE_SHIFT;
 919
 920         result->size = 0;
 921         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 922         result->user_wire_size  = 0;
 923         result->ref_count = 1;
 924 #if     TASK_SWAPPER
 925         result->res_count = 1;
 926         result->sw_state = MAP_SW_IN;
 927 #endif  /* TASK_SWAPPER */
 928         result->pmap = pmap;
 929         result->min_offset = min;
 930         result->max_offset = max;
 931         result->wiring_required = FALSE;
 932         result->no_zero_fill = FALSE;
 933         result->mapped_in_other_pmaps = FALSE;
 934         result->wait_for_space = FALSE;
 935         result->switch_protect = FALSE;
 936         result->disable_vmentry_reuse = FALSE;
 937         result->map_disallow_data_exec = FALSE;
 938         result->highest_entry_end = 0;
 939         result->first_free = vm_map_to_entry(result);
 940         result->hint = vm_map_to_entry(result);
 941         result->color_rr = (color_seed++) & vm_color_mask;
 942         result->jit_entry_exists = FALSE;
 943
 944         if (vm_map_supports_hole_optimization && pmap != kernel_pmap) {
 945                 hole_entry = zalloc(vm_map_holes_zone);
 946
 947                 hole_entry->start = min;
 948                 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
 949                 result->holes_list = result->hole_hint = hole_entry;
 950                 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
 951                 result->holelistenabled = TRUE;
 952
 953         } else {
 954
 955                 result->holelistenabled = FALSE;
 956         }
 957
 958 #if CONFIG_FREEZE
 959         result->default_freezer_handle = NULL;
 960 #endif
 961         vm_map_lock_init(result);
 962         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 963
 964         return(result);
 965 }
 966
 967 /*
 968  *      vm_map_entry_create:    [ internal use only ]
 969  *
 970  *      Allocates a VM map entry for insertion in the
 971  *      given map (or map copy).  No fields are filled.
 972  */
 973 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 974
 975 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 976         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 977 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 978
 979 static vm_map_entry_t
 980 _vm_map_entry_create(
 981         struct vm_map_header    *map_header, boolean_t __unused map_locked)
 982 {
 983         zone_t  zone;
 984         vm_map_entry_t  entry;
 985
 986         zone = vm_map_entry_zone;
 987
 988         assert(map_header->entries_pageable ? !map_locked : TRUE);
 989
 990         if (map_header->entries_pageable) {
 991                 entry = (vm_map_entry_t) zalloc(zone);
 992         }
 993         else {
 994                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
 995
 996                 if (entry == VM_MAP_ENTRY_NULL) {
 997                         zone = vm_map_entry_reserved_zone;
 998                         entry = (vm_map_entry_t) zalloc(zone);
 999                         OSAddAtomic(1, &reserved_zalloc_count);
1000                 } else
1001                         OSAddAtomic(1, &nonreserved_zalloc_count);
1002         }
1003
1004         if (entry == VM_MAP_ENTRY_NULL)
1005                 panic("vm_map_entry_create");
1006         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1007
1008         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1009 #if     MAP_ENTRY_CREATION_DEBUG
1010         entry->vme_creation_maphdr = map_header;
1011         fastbacktrace(&entry->vme_creation_bt[0],
1012                       (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1013 #endif
1014         return(entry);
1015 }
1016
1017 /*
1018  *      vm_map_entry_dispose:   [ internal use only ]
1019  *
1020  *      Inverse of vm_map_entry_create.
1021  *
1022  *      write map lock held so no need to
1023  *      do anything special to insure correctness
1024  *      of the stores
1025  */
1026 #define vm_map_entry_dispose(map, entry)                        \
1027         _vm_map_entry_dispose(&(map)->hdr, (entry))
1028
1029 #define vm_map_copy_entry_dispose(map, entry) \
1030         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1031
1032 static void
1033 _vm_map_entry_dispose(
1034         register struct vm_map_header   *map_header,
1035         register vm_map_entry_t         entry)
1036 {
1037         register zone_t         zone;
1038
1039         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1040                 zone = vm_map_entry_zone;
1041         else
1042                 zone = vm_map_entry_reserved_zone;
1043
1044         if (!map_header->entries_pageable) {
1045                 if (zone == vm_map_entry_zone)
1046                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1047                 else
1048                         OSAddAtomic(-1, &reserved_zalloc_count);
1049         }
1050
1051         zfree(zone, entry);
1052 }
1053
1054 #if MACH_ASSERT
1055 static boolean_t first_free_check = FALSE;
1056 boolean_t
1057 first_free_is_valid(
1058         vm_map_t        map)
1059 {
1060         if (!first_free_check)
1061                 return TRUE;
1062
1063         return( first_free_is_valid_store( map ));
1064 }
1065 #endif /* MACH_ASSERT */
1066
1067
1068 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1069         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1070
1071 #define vm_map_copy_entry_unlink(copy, entry)                           \
1072         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1073
1074 #if     MACH_ASSERT && TASK_SWAPPER
1075 /*
1076  *      vm_map_res_reference:
1077  *
1078  *      Adds another valid residence count to the given map.
1079  *
1080  *      Map is locked so this function can be called from
1081  *      vm_map_swapin.
1082  *
1083  */
1084 void vm_map_res_reference(register vm_map_t map)
1085 {
1086         /* assert map is locked */
1087         assert(map->res_count >= 0);
1088         assert(map->ref_count >= map->res_count);
1089         if (map->res_count == 0) {
1090                 lck_mtx_unlock(&map->s_lock);
1091                 vm_map_lock(map);
1092                 vm_map_swapin(map);
1093                 lck_mtx_lock(&map->s_lock);
1094                 ++map->res_count;
1095                 vm_map_unlock(map);
1096         } else
1097                 ++map->res_count;
1098 }
1099
1100 /*
1101  *      vm_map_reference_swap:
1102  *
1103  *      Adds valid reference and residence counts to the given map.
1104  *
1105  *      The map may not be in memory (i.e. zero residence count).
1106  *
1107  */
1108 void vm_map_reference_swap(register vm_map_t map)
1109 {
1110         assert(map != VM_MAP_NULL);
1111         lck_mtx_lock(&map->s_lock);
1112         assert(map->res_count >= 0);
1113         assert(map->ref_count >= map->res_count);
1114         map->ref_count++;
1115         vm_map_res_reference(map);
1116         lck_mtx_unlock(&map->s_lock);
1117 }
1118
1119 /*
1120  *      vm_map_res_deallocate:
1121  *
1122  *      Decrement residence count on a map; possibly causing swapout.
1123  *
1124  *      The map must be in memory (i.e. non-zero residence count).
1125  *
1126  *      The map is locked, so this function is callable from vm_map_deallocate.
1127  *
1128  */
1129 void vm_map_res_deallocate(register vm_map_t map)
1130 {
1131         assert(map->res_count > 0);
1132         if (--map->res_count == 0) {
1133                 lck_mtx_unlock(&map->s_lock);
1134                 vm_map_lock(map);
1135                 vm_map_swapout(map);
1136                 vm_map_unlock(map);
1137                 lck_mtx_lock(&map->s_lock);
1138         }
1139         assert(map->ref_count >= map->res_count);
1140 }
1141 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1142
1143 /*
1144  *      vm_map_destroy:
1145  *
1146  *      Actually destroy a map.
1147  */
1148 void
1149 vm_map_destroy(
1150         vm_map_t        map,
1151         int             flags)
1152 {
1153         vm_map_lock(map);
1154
1155         /* final cleanup: no need to unnest shared region */
1156         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1157
1158         /* clean up regular map entries */
1159         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1160                              flags, VM_MAP_NULL);
1161         /* clean up leftover special mappings (commpage, etc...) */
1162         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1163                              flags, VM_MAP_NULL);
1164
1165 #if CONFIG_FREEZE
1166         if (map->default_freezer_handle) {
1167                 default_freezer_handle_deallocate(map->default_freezer_handle);
1168                 map->default_freezer_handle = NULL;
1169         }
1170 #endif
1171         vm_map_disable_hole_optimization(map);
1172         vm_map_unlock(map);
1173
1174         assert(map->hdr.nentries == 0);
1175
1176         if(map->pmap)
1177                 pmap_destroy(map->pmap);
1178
1179         zfree(vm_map_zone, map);
1180 }
1181
1182 #if     TASK_SWAPPER
1183 /*
1184  * vm_map_swapin/vm_map_swapout
1185  *
1186  * Swap a map in and out, either referencing or releasing its resources.
1187  * These functions are internal use only; however, they must be exported
1188  * because they may be called from macros, which are exported.
1189  *
1190  * In the case of swapout, there could be races on the residence count,
1191  * so if the residence count is up, we return, assuming that a
1192  * vm_map_deallocate() call in the near future will bring us back.
1193  *
1194  * Locking:
1195  *      -- We use the map write lock for synchronization among races.
1196  *      -- The map write lock, and not the simple s_lock, protects the
1197  *         swap state of the map.
1198  *      -- If a map entry is a share map, then we hold both locks, in
1199  *         hierarchical order.
1200  *
1201  * Synchronization Notes:
1202  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1203  *      will block on the map lock and proceed when swapout is through.
1204  *      2) A vm_map_reference() call at this time is illegal, and will
1205  *      cause a panic.  vm_map_reference() is only allowed on resident
1206  *      maps, since it refuses to block.
1207  *      3) A vm_map_swapin() call during a swapin will block, and
1208  *      proceeed when the first swapin is done, turning into a nop.
1209  *      This is the reason the res_count is not incremented until
1210  *      after the swapin is complete.
1211  *      4) There is a timing hole after the checks of the res_count, before
1212  *      the map lock is taken, during which a swapin may get the lock
1213  *      before a swapout about to happen.  If this happens, the swapin
1214  *      will detect the state and increment the reference count, causing
1215  *      the swapout to be a nop, thereby delaying it until a later
1216  *      vm_map_deallocate.  If the swapout gets the lock first, then
1217  *      the swapin will simply block until the swapout is done, and
1218  *      then proceed.
1219  *
1220  * Because vm_map_swapin() is potentially an expensive operation, it
1221  * should be used with caution.
1222  *
1223  * Invariants:
1224  *      1) A map with a residence count of zero is either swapped, or
1225  *         being swapped.
1226  *      2) A map with a non-zero residence count is either resident,
1227  *         or being swapped in.
1228  */
1229
1230 int vm_map_swap_enable = 1;
1231
1232 void vm_map_swapin (vm_map_t map)
1233 {
1234         register vm_map_entry_t entry;
1235
1236         if (!vm_map_swap_enable)        /* debug */
1237                 return;
1238
1239         /*
1240          * Map is locked
1241          * First deal with various races.
1242          */
1243         if (map->sw_state == MAP_SW_IN)
1244                 /*
1245                  * we raced with swapout and won.  Returning will incr.
1246                  * the res_count, turning the swapout into a nop.
1247                  */
1248                 return;
1249
1250         /*
1251          * The residence count must be zero.  If we raced with another
1252          * swapin, the state would have been IN; if we raced with a
1253          * swapout (after another competing swapin), we must have lost
1254          * the race to get here (see above comment), in which case
1255          * res_count is still 0.
1256          */
1257         assert(map->res_count == 0);
1258
1259         /*
1260          * There are no intermediate states of a map going out or
1261          * coming in, since the map is locked during the transition.
1262          */
1263         assert(map->sw_state == MAP_SW_OUT);
1264
1265         /*
1266          * We now operate upon each map entry.  If the entry is a sub-
1267          * or share-map, we call vm_map_res_reference upon it.
1268          * If the entry is an object, we call vm_object_res_reference
1269          * (this may iterate through the shadow chain).
1270          * Note that we hold the map locked the entire time,
1271          * even if we get back here via a recursive call in
1272          * vm_map_res_reference.
1273          */
1274         entry = vm_map_first_entry(map);
1275
1276         while (entry != vm_map_to_entry(map)) {
1277                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1278                         if (entry->is_sub_map) {
1279                                 vm_map_t lmap = VME_SUBMAP(entry);
1280                                 lck_mtx_lock(&lmap->s_lock);
1281                                 vm_map_res_reference(lmap);
1282                                 lck_mtx_unlock(&lmap->s_lock);
1283                         } else {
1284                                 vm_object_t object = VME_OBEJCT(entry);
1285                                 vm_object_lock(object);
1286                                 /*
1287                                  * This call may iterate through the
1288                                  * shadow chain.
1289                                  */
1290                                 vm_object_res_reference(object);
1291                                 vm_object_unlock(object);
1292                         }
1293                 }
1294                 entry = entry->vme_next;
1295         }
1296         assert(map->sw_state == MAP_SW_OUT);
1297         map->sw_state = MAP_SW_IN;
1298 }
1299
1300 void vm_map_swapout(vm_map_t map)
1301 {
1302         register vm_map_entry_t entry;
1303
1304         /*
1305          * Map is locked
1306          * First deal with various races.
1307          * If we raced with a swapin and lost, the residence count
1308          * will have been incremented to 1, and we simply return.
1309          */
1310         lck_mtx_lock(&map->s_lock);
1311         if (map->res_count != 0) {
1312                 lck_mtx_unlock(&map->s_lock);
1313                 return;
1314         }
1315         lck_mtx_unlock(&map->s_lock);
1316
1317         /*
1318          * There are no intermediate states of a map going out or
1319          * coming in, since the map is locked during the transition.
1320          */
1321         assert(map->sw_state == MAP_SW_IN);
1322
1323         if (!vm_map_swap_enable)
1324                 return;
1325
1326         /*
1327          * We now operate upon each map entry.  If the entry is a sub-
1328          * or share-map, we call vm_map_res_deallocate upon it.
1329          * If the entry is an object, we call vm_object_res_deallocate
1330          * (this may iterate through the shadow chain).
1331          * Note that we hold the map locked the entire time,
1332          * even if we get back here via a recursive call in
1333          * vm_map_res_deallocate.
1334          */
1335         entry = vm_map_first_entry(map);
1336
1337         while (entry != vm_map_to_entry(map)) {
1338                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1339                         if (entry->is_sub_map) {
1340                                 vm_map_t lmap = VME_SUBMAP(entry);
1341                                 lck_mtx_lock(&lmap->s_lock);
1342                                 vm_map_res_deallocate(lmap);
1343                                 lck_mtx_unlock(&lmap->s_lock);
1344                         } else {
1345                                 vm_object_t object = VME_OBJECT(entry);
1346                                 vm_object_lock(object);
1347                                 /*
1348                                  * This call may take a long time,
1349                                  * since it could actively push
1350                                  * out pages (if we implement it
1351                                  * that way).
1352                                  */
1353                                 vm_object_res_deallocate(object);
1354                                 vm_object_unlock(object);
1355                         }
1356                 }
1357                 entry = entry->vme_next;
1358         }
1359         assert(map->sw_state == MAP_SW_IN);
1360         map->sw_state = MAP_SW_OUT;
1361 }
1362
1363 #endif  /* TASK_SWAPPER */
1364
1365 /*
1366  *      vm_map_lookup_entry:    [ internal use only ]
1367  *
1368  *      Calls into the vm map store layer to find the map
1369  *      entry containing (or immediately preceding) the
1370  *      specified address in the given map; the entry is returned
1371  *      in the "entry" parameter.  The boolean
1372  *      result indicates whether the address is
1373  *      actually contained in the map.
1374  */
1375 boolean_t
1376 vm_map_lookup_entry(
1377         register vm_map_t               map,
1378         register vm_map_offset_t        address,
1379         vm_map_entry_t          *entry)         /* OUT */
1380 {
1381         return ( vm_map_store_lookup_entry( map, address, entry ));
1382 }
1383
1384 /*
1385  *      Routine:        vm_map_find_space
1386  *      Purpose:
1387  *              Allocate a range in the specified virtual address map,
1388  *              returning the entry allocated for that range.
1389  *              Used by kmem_alloc, etc.
1390  *
1391  *              The map must be NOT be locked. It will be returned locked
1392  *              on KERN_SUCCESS, unlocked on failure.
1393  *
1394  *              If an entry is allocated, the object/offset fields
1395  *              are initialized to zero.
1396  */
1397 kern_return_t
1398 vm_map_find_space(
1399         register vm_map_t       map,
1400         vm_map_offset_t         *address,       /* OUT */
1401         vm_map_size_t           size,
1402         vm_map_offset_t         mask,
1403         int                     flags,
1404         vm_map_entry_t          *o_entry)       /* OUT */
1405 {
1406         vm_map_entry_t                  entry, new_entry;
1407         register vm_map_offset_t        start;
1408         register vm_map_offset_t        end;
1409         vm_map_entry_t                  hole_entry;
1410
1411         if (size == 0) {
1412                 *address = 0;
1413                 return KERN_INVALID_ARGUMENT;
1414         }
1415
1416         if (flags & VM_FLAGS_GUARD_AFTER) {
1417                 /* account for the back guard page in the size */
1418                 size += VM_MAP_PAGE_SIZE(map);
1419         }
1420
1421         new_entry = vm_map_entry_create(map, FALSE);
1422
1423         /*
1424          *      Look for the first possible address; if there's already
1425          *      something at this address, we have to start after it.
1426          */
1427
1428         vm_map_lock(map);
1429
1430         if( map->disable_vmentry_reuse == TRUE) {
1431                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1432         } else {
1433                 if (map->holelistenabled) {
1434                         hole_entry = (vm_map_entry_t)map->holes_list;
1435
1436                         if (hole_entry == NULL) {
1437                                 /*
1438                                  * No more space in the map?
1439                                  */
1440                                 vm_map_entry_dispose(map, new_entry);
1441                                 vm_map_unlock(map);
1442                                 return(KERN_NO_SPACE);
1443                         }
1444
1445                         entry = hole_entry;
1446                         start = entry->vme_start;
1447                 } else {
1448                         assert(first_free_is_valid(map));
1449                         if ((entry = map->first_free) == vm_map_to_entry(map))
1450                                 start = map->min_offset;
1451                         else
1452                                 start = entry->vme_end;
1453                 }
1454         }
1455
1456         /*
1457          *      In any case, the "entry" always precedes
1458          *      the proposed new region throughout the loop:
1459          */
1460
1461         while (TRUE) {
1462                 register vm_map_entry_t next;
1463
1464                 /*
1465                  *      Find the end of the proposed new region.
1466                  *      Be sure we didn't go beyond the end, or
1467                  *      wrap around the address.
1468                  */
1469
1470                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1471                         /* reserve space for the front guard page */
1472                         start += VM_MAP_PAGE_SIZE(map);
1473                 }
1474                 end = ((start + mask) & ~mask);
1475
1476                 if (end < start) {
1477                         vm_map_entry_dispose(map, new_entry);
1478                         vm_map_unlock(map);
1479                         return(KERN_NO_SPACE);
1480                 }
1481                 start = end;
1482                 end += size;
1483
1484                 if ((end > map->max_offset) || (end < start)) {
1485                         vm_map_entry_dispose(map, new_entry);
1486                         vm_map_unlock(map);
1487                         return(KERN_NO_SPACE);
1488                 }
1489
1490                 next = entry->vme_next;
1491
1492                 if (map->holelistenabled) {
1493                         if (entry->vme_end >= end)
1494                                 break;
1495                 } else {
1496                         /*
1497                          *      If there are no more entries, we must win.
1498                          *
1499                          *      OR
1500                          *
1501                          *      If there is another entry, it must be
1502                          *      after the end of the potential new region.
1503                          */
1504
1505                         if (next == vm_map_to_entry(map))
1506                                 break;
1507
1508                         if (next->vme_start >= end)
1509                                 break;
1510                 }
1511
1512                 /*
1513                  *      Didn't fit -- move to the next entry.
1514                  */
1515
1516                 entry = next;
1517
1518                 if (map->holelistenabled) {
1519                         if (entry == (vm_map_entry_t) map->holes_list) {
1520                                 /*
1521                                  * Wrapped around
1522                                  */
1523                                 vm_map_entry_dispose(map, new_entry);
1524                                 vm_map_unlock(map);
1525                                 return(KERN_NO_SPACE);
1526                         }
1527                         start = entry->vme_start;
1528                 } else {
1529                         start = entry->vme_end;
1530                 }
1531         }
1532
1533         if (map->holelistenabled) {
1534                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1535                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1536                 }
1537         }
1538
1539         /*
1540          *      At this point,
1541          *              "start" and "end" should define the endpoints of the
1542          *                      available new range, and
1543          *              "entry" should refer to the region before the new
1544          *                      range, and
1545          *
1546          *              the map should be locked.
1547          */
1548
1549         if (flags & VM_FLAGS_GUARD_BEFORE) {
1550                 /* go back for the front guard page */
1551                 start -= VM_MAP_PAGE_SIZE(map);
1552         }
1553         *address = start;
1554
1555         assert(start < end);
1556         new_entry->vme_start = start;
1557         new_entry->vme_end = end;
1558         assert(page_aligned(new_entry->vme_start));
1559         assert(page_aligned(new_entry->vme_end));
1560         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1561                                    VM_MAP_PAGE_MASK(map)));
1562         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1563                                    VM_MAP_PAGE_MASK(map)));
1564
1565         new_entry->is_shared = FALSE;
1566         new_entry->is_sub_map = FALSE;
1567         new_entry->use_pmap = TRUE;
1568         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1569         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1570
1571         new_entry->needs_copy = FALSE;
1572
1573         new_entry->inheritance = VM_INHERIT_DEFAULT;
1574         new_entry->protection = VM_PROT_DEFAULT;
1575         new_entry->max_protection = VM_PROT_ALL;
1576         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1577         new_entry->wired_count = 0;
1578         new_entry->user_wired_count = 0;
1579
1580         new_entry->in_transition = FALSE;
1581         new_entry->needs_wakeup = FALSE;
1582         new_entry->no_cache = FALSE;
1583         new_entry->permanent = FALSE;
1584         new_entry->superpage_size = FALSE;
1585         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1586                 new_entry->map_aligned = TRUE;
1587         } else {
1588                 new_entry->map_aligned = FALSE;
1589         }
1590
1591         new_entry->used_for_jit = FALSE;
1592         new_entry->zero_wired_pages = FALSE;
1593         new_entry->iokit_acct = FALSE;
1594         new_entry->vme_resilient_codesign = FALSE;
1595         new_entry->vme_resilient_media = FALSE;
1596
1597         int alias;
1598         VM_GET_FLAGS_ALIAS(flags, alias);
1599         VME_ALIAS_SET(new_entry, alias);
1600
1601         /*
1602          *      Insert the new entry into the list
1603          */
1604
1605         vm_map_store_entry_link(map, entry, new_entry);
1606
1607         map->size += size;
1608
1609         /*
1610          *      Update the lookup hint
1611          */
1612         SAVE_HINT_MAP_WRITE(map, new_entry);
1613
1614         *o_entry = new_entry;
1615         return(KERN_SUCCESS);
1616 }
1617
1618 int vm_map_pmap_enter_print = FALSE;
1619 int vm_map_pmap_enter_enable = FALSE;
1620
1621 /*
1622  *      Routine:        vm_map_pmap_enter [internal only]
1623  *
1624  *      Description:
1625  *              Force pages from the specified object to be entered into
1626  *              the pmap at the specified address if they are present.
1627  *              As soon as a page not found in the object the scan ends.
1628  *
1629  *      Returns:
1630  *              Nothing.
1631  *
1632  *      In/out conditions:
1633  *              The source map should not be locked on entry.
1634  */
1635 __unused static void
1636 vm_map_pmap_enter(
1637         vm_map_t                map,
1638         register vm_map_offset_t        addr,
1639         register vm_map_offset_t        end_addr,
1640         register vm_object_t    object,
1641         vm_object_offset_t      offset,
1642         vm_prot_t               protection)
1643 {
1644         int                     type_of_fault;
1645         kern_return_t           kr;
1646
1647         if(map->pmap == 0)
1648                 return;
1649
1650         while (addr < end_addr) {
1651                 register vm_page_t      m;
1652
1653
1654                 /*
1655                  * TODO:
1656                  * From vm_map_enter(), we come into this function without the map
1657                  * lock held or the object lock held.
1658                  * We haven't taken a reference on the object either.
1659                  * We should do a proper lookup on the map to make sure
1660                  * that things are sane before we go locking objects that
1661                  * could have been deallocated from under us.
1662                  */
1663
1664                 vm_object_lock(object);
1665
1666                 m = vm_page_lookup(object, offset);
1667                 /*
1668                  * ENCRYPTED SWAP:
1669                  * The user should never see encrypted data, so do not
1670                  * enter an encrypted page in the page table.
1671                  */
1672                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1673                     m->fictitious ||
1674                     (m->unusual && ( m->error || m->restart || m->absent))) {
1675                         vm_object_unlock(object);
1676                         return;
1677                 }
1678
1679                 if (vm_map_pmap_enter_print) {
1680                         printf("vm_map_pmap_enter:");
1681                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1682                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1683                 }
1684                 type_of_fault = DBG_CACHE_HIT_FAULT;
1685                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1686                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1687                                     0, /* XXX need user tag / alias? */
1688                                     0, /* alternate accounting? */
1689                                     NULL,
1690                                     &type_of_fault);
1691
1692                 vm_object_unlock(object);
1693
1694                 offset += PAGE_SIZE_64;
1695                 addr += PAGE_SIZE;
1696         }
1697 }
1698
1699 boolean_t vm_map_pmap_is_empty(
1700         vm_map_t        map,
1701         vm_map_offset_t start,
1702         vm_map_offset_t end);
1703 boolean_t vm_map_pmap_is_empty(
1704         vm_map_t        map,
1705         vm_map_offset_t start,
1706         vm_map_offset_t end)
1707 {
1708 #ifdef MACHINE_PMAP_IS_EMPTY
1709         return pmap_is_empty(map->pmap, start, end);
1710 #else   /* MACHINE_PMAP_IS_EMPTY */
1711         vm_map_offset_t offset;
1712         ppnum_t         phys_page;
1713
1714         if (map->pmap == NULL) {
1715                 return TRUE;
1716         }
1717
1718         for (offset = start;
1719              offset < end;
1720              offset += PAGE_SIZE) {
1721                 phys_page = pmap_find_phys(map->pmap, offset);
1722                 if (phys_page) {
1723                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1724                                 "page %d at 0x%llx\n",
1725                                 map, (long long)start, (long long)end,
1726                                 phys_page, (long long)offset);
1727                         return FALSE;
1728                 }
1729         }
1730         return TRUE;
1731 #endif  /* MACHINE_PMAP_IS_EMPTY */
1732 }
1733
1734 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1735 kern_return_t
1736 vm_map_random_address_for_size(
1737         vm_map_t        map,
1738         vm_map_offset_t *address,
1739         vm_map_size_t   size)
1740 {
1741         kern_return_t   kr = KERN_SUCCESS;
1742         int             tries = 0;
1743         vm_map_offset_t random_addr = 0;
1744         vm_map_offset_t hole_end;
1745
1746         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1747         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1748         vm_map_size_t   vm_hole_size = 0;
1749         vm_map_size_t   addr_space_size;
1750
1751         addr_space_size = vm_map_max(map) - vm_map_min(map);
1752
1753         assert(page_aligned(size));
1754
1755         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1756                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1757                 random_addr = vm_map_trunc_page(
1758                         vm_map_min(map) +(random_addr % addr_space_size),
1759                         VM_MAP_PAGE_MASK(map));
1760
1761                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1762                         if (prev_entry == vm_map_to_entry(map)) {
1763                                 next_entry = vm_map_first_entry(map);
1764                         } else {
1765                                 next_entry = prev_entry->vme_next;
1766                         }
1767                         if (next_entry == vm_map_to_entry(map)) {
1768                                 hole_end = vm_map_max(map);
1769                         } else {
1770                                 hole_end = next_entry->vme_start;
1771                         }
1772                         vm_hole_size = hole_end - random_addr;
1773                         if (vm_hole_size >= size) {
1774                                 *address = random_addr;
1775                                 break;
1776                         }
1777                 }
1778                 tries++;
1779         }
1780
1781         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1782                 kr = KERN_NO_SPACE;
1783         }
1784         return kr;
1785 }
1786
1787 /*
1788  *      Routine:        vm_map_enter
1789  *
1790  *      Description:
1791  *              Allocate a range in the specified virtual address map.
1792  *              The resulting range will refer to memory defined by
1793  *              the given memory object and offset into that object.
1794  *
1795  *              Arguments are as defined in the vm_map call.
1796  */
1797 int _map_enter_debug = 0;
1798 static unsigned int vm_map_enter_restore_successes = 0;
1799 static unsigned int vm_map_enter_restore_failures = 0;
1800 kern_return_t
1801 vm_map_enter(
1802         vm_map_t                map,
1803         vm_map_offset_t         *address,       /* IN/OUT */
1804         vm_map_size_t           size,
1805         vm_map_offset_t         mask,
1806         int                     flags,
1807         vm_object_t             object,
1808         vm_object_offset_t      offset,
1809         boolean_t               needs_copy,
1810         vm_prot_t               cur_protection,
1811         vm_prot_t               max_protection,
1812         vm_inherit_t            inheritance)
1813 {
1814         vm_map_entry_t          entry, new_entry;
1815         vm_map_offset_t         start, tmp_start, tmp_offset;
1816         vm_map_offset_t         end, tmp_end;
1817         vm_map_offset_t         tmp2_start, tmp2_end;
1818         vm_map_offset_t         step;
1819         kern_return_t           result = KERN_SUCCESS;
1820         vm_map_t                zap_old_map = VM_MAP_NULL;
1821         vm_map_t                zap_new_map = VM_MAP_NULL;
1822         boolean_t               map_locked = FALSE;
1823         boolean_t               pmap_empty = TRUE;
1824         boolean_t               new_mapping_established = FALSE;
1825         boolean_t               keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1826         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1827         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1828         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1829         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1830         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1831         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1832         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1833         boolean_t               iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1834         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
1835         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
1836         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1837         vm_tag_t                alias, user_alias;
1838         vm_map_offset_t         effective_min_offset, effective_max_offset;
1839         kern_return_t           kr;
1840         boolean_t               clear_map_aligned = FALSE;
1841         vm_map_entry_t          hole_entry;
1842
1843         if (superpage_size) {
1844                 switch (superpage_size) {
1845                         /*
1846                          * Note that the current implementation only supports
1847                          * a single size for superpages, SUPERPAGE_SIZE, per
1848                          * architecture. As soon as more sizes are supposed
1849                          * to be supported, SUPERPAGE_SIZE has to be replaced
1850                          * with a lookup of the size depending on superpage_size.
1851                          */
1852 #ifdef __x86_64__
1853                         case SUPERPAGE_SIZE_ANY:
1854                                 /* handle it like 2 MB and round up to page size */
1855                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1856                         case SUPERPAGE_SIZE_2MB:
1857                                 break;
1858 #endif
1859                         default:
1860                                 return KERN_INVALID_ARGUMENT;
1861                 }
1862                 mask = SUPERPAGE_SIZE-1;
1863                 if (size & (SUPERPAGE_SIZE-1))
1864                         return KERN_INVALID_ARGUMENT;
1865                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1866         }
1867
1868
1869
1870         if (resilient_codesign || resilient_media) {
1871                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
1872                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
1873                         return KERN_PROTECTION_FAILURE;
1874                 }
1875         }
1876
1877         if (is_submap) {
1878                 if (purgable) {
1879                         /* submaps can not be purgeable */
1880                         return KERN_INVALID_ARGUMENT;
1881                 }
1882                 if (object == VM_OBJECT_NULL) {
1883                         /* submaps can not be created lazily */
1884                         return KERN_INVALID_ARGUMENT;
1885                 }
1886         }
1887         if (flags & VM_FLAGS_ALREADY) {
1888                 /*
1889                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1890                  * is already present.  For it to be meaningul, the requested
1891                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1892                  * we shouldn't try and remove what was mapped there first
1893                  * (!VM_FLAGS_OVERWRITE).
1894                  */
1895                 if ((flags & VM_FLAGS_ANYWHERE) ||
1896                     (flags & VM_FLAGS_OVERWRITE)) {
1897                         return KERN_INVALID_ARGUMENT;
1898                 }
1899         }
1900
1901         effective_min_offset = map->min_offset;
1902
1903         if (flags & VM_FLAGS_BEYOND_MAX) {
1904                 /*
1905                  * Allow an insertion beyond the map's max offset.
1906                  */
1907                 if (vm_map_is_64bit(map))
1908                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1909                 else
1910                         effective_max_offset = 0x00000000FFFFF000ULL;
1911         } else {
1912                 effective_max_offset = map->max_offset;
1913         }
1914
1915         if (size == 0 ||
1916             (offset & PAGE_MASK_64) != 0) {
1917                 *address = 0;
1918                 return KERN_INVALID_ARGUMENT;
1919         }
1920
1921         VM_GET_FLAGS_ALIAS(flags, alias);
1922         if (map->pmap == kernel_pmap) {
1923                 user_alias = VM_KERN_MEMORY_NONE;
1924         } else {
1925                 user_alias = alias;
1926         }
1927
1928 #define RETURN(value)   { result = value; goto BailOut; }
1929
1930         assert(page_aligned(*address));
1931         assert(page_aligned(size));
1932
1933         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1934                 /*
1935                  * In most cases, the caller rounds the size up to the
1936                  * map's page size.
1937                  * If we get a size that is explicitly not map-aligned here,
1938                  * we'll have to respect the caller's wish and mark the
1939                  * mapping as "not map-aligned" to avoid tripping the
1940                  * map alignment checks later.
1941                  */
1942                 clear_map_aligned = TRUE;
1943         }
1944         if (!anywhere &&
1945             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1946                 /*
1947                  * We've been asked to map at a fixed address and that
1948                  * address is not aligned to the map's specific alignment.
1949                  * The caller should know what it's doing (i.e. most likely
1950                  * mapping some fragmented copy map, transferring memory from
1951                  * a VM map with a different alignment), so clear map_aligned
1952                  * for this new VM map entry and proceed.
1953                  */
1954                 clear_map_aligned = TRUE;
1955         }
1956
1957         /*
1958          * Only zero-fill objects are allowed to be purgable.
1959          * LP64todo - limit purgable objects to 32-bits for now
1960          */
1961         if (purgable &&
1962             (offset != 0 ||
1963              (object != VM_OBJECT_NULL &&
1964               (object->vo_size != size ||
1965                object->purgable == VM_PURGABLE_DENY))
1966              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1967                 return KERN_INVALID_ARGUMENT;
1968
1969         if (!anywhere && overwrite) {
1970                 /*
1971                  * Create a temporary VM map to hold the old mappings in the
1972                  * affected area while we create the new one.
1973                  * This avoids releasing the VM map lock in
1974                  * vm_map_entry_delete() and allows atomicity
1975                  * when we want to replace some mappings with a new one.
1976                  * It also allows us to restore the old VM mappings if the
1977                  * new mapping fails.
1978                  */
1979                 zap_old_map = vm_map_create(PMAP_NULL,
1980                                             *address,
1981                                             *address + size,
1982                                             map->hdr.entries_pageable);
1983                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1984                 vm_map_disable_hole_optimization(zap_old_map);
1985         }
1986
1987 StartAgain: ;
1988
1989         start = *address;
1990
1991         if (anywhere) {
1992                 vm_map_lock(map);
1993                 map_locked = TRUE;
1994
1995                 if (entry_for_jit) {
1996                         if (map->jit_entry_exists) {
1997                                 result = KERN_INVALID_ARGUMENT;
1998                                 goto BailOut;
1999                         }
2000                         /*
2001                          * Get a random start address.
2002                          */
2003                         result = vm_map_random_address_for_size(map, address, size);
2004                         if (result != KERN_SUCCESS) {
2005                                 goto BailOut;
2006                         }
2007                         start = *address;
2008                 }
2009
2010
2011                 /*
2012                  *      Calculate the first possible address.
2013                  */
2014
2015                 if (start < effective_min_offset)
2016                         start = effective_min_offset;
2017                 if (start > effective_max_offset)
2018                         RETURN(KERN_NO_SPACE);
2019
2020                 /*
2021                  *      Look for the first possible address;
2022                  *      if there's already something at this
2023                  *      address, we have to start after it.
2024                  */
2025
2026                 if( map->disable_vmentry_reuse == TRUE) {
2027                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2028                 } else {
2029
2030                         if (map->holelistenabled) {
2031                                 hole_entry = (vm_map_entry_t)map->holes_list;
2032
2033                                 if (hole_entry == NULL) {
2034                                         /*
2035                                          * No more space in the map?
2036                                          */
2037                                         result = KERN_NO_SPACE;
2038                                         goto BailOut;
2039                                 } else {
2040
2041                                         boolean_t found_hole = FALSE;
2042
2043                                         do {
2044                                                 if (hole_entry->vme_start >= start) {
2045                                                         start = hole_entry->vme_start;
2046                                                         found_hole = TRUE;
2047                                                         break;
2048                                                 }
2049
2050                                                 if (hole_entry->vme_end > start) {
2051                                                         found_hole = TRUE;
2052                                                         break;
2053                                                 }
2054                                                 hole_entry = hole_entry->vme_next;
2055
2056                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
2057
2058                                         if (found_hole == FALSE) {
2059                                                 result = KERN_NO_SPACE;
2060                                                 goto BailOut;
2061                                         }
2062
2063                                         entry = hole_entry;
2064
2065                                         if (start == 0)
2066                                                 start += PAGE_SIZE_64;
2067                                 }
2068                         } else {
2069                                 assert(first_free_is_valid(map));
2070
2071                                 entry = map->first_free;
2072
2073                                 if (entry == vm_map_to_entry(map)) {
2074                                         entry = NULL;
2075                                 } else {
2076                                        if (entry->vme_next == vm_map_to_entry(map)){
2077                                                /*
2078                                                 * Hole at the end of the map.
2079                                                 */
2080                                                 entry = NULL;
2081                                        } else {
2082                                                 if (start < (entry->vme_next)->vme_start ) {
2083                                                         start = entry->vme_end;
2084                                                         start = vm_map_round_page(start,
2085                                                                                   VM_MAP_PAGE_MASK(map));
2086                                                 } else {
2087                                                         /*
2088                                                          * Need to do a lookup.
2089                                                          */
2090                                                         entry = NULL;
2091                                                 }
2092                                        }
2093                                 }
2094
2095                                 if (entry == NULL) {
2096                                         vm_map_entry_t  tmp_entry;
2097                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2098                                                 assert(!entry_for_jit);
2099                                                 start = tmp_entry->vme_end;
2100                                                 start = vm_map_round_page(start,
2101                                                                           VM_MAP_PAGE_MASK(map));
2102                                         }
2103                                         entry = tmp_entry;
2104                                 }
2105                         }
2106                 }
2107
2108                 /*
2109                  *      In any case, the "entry" always precedes
2110                  *      the proposed new region throughout the
2111                  *      loop:
2112                  */
2113
2114                 while (TRUE) {
2115                         register vm_map_entry_t next;
2116
2117                         /*
2118                          *      Find the end of the proposed new region.
2119                          *      Be sure we didn't go beyond the end, or
2120                          *      wrap around the address.
2121                          */
2122
2123                         end = ((start + mask) & ~mask);
2124                         end = vm_map_round_page(end,
2125                                                 VM_MAP_PAGE_MASK(map));
2126                         if (end < start)
2127                                 RETURN(KERN_NO_SPACE);
2128                         start = end;
2129                         assert(VM_MAP_PAGE_ALIGNED(start,
2130                                                    VM_MAP_PAGE_MASK(map)));
2131                         end += size;
2132
2133                         if ((end > effective_max_offset) || (end < start)) {
2134                                 if (map->wait_for_space) {
2135                                         assert(!keep_map_locked);
2136                                         if (size <= (effective_max_offset -
2137                                                      effective_min_offset)) {
2138                                                 assert_wait((event_t)map,
2139                                                             THREAD_ABORTSAFE);
2140                                                 vm_map_unlock(map);
2141                                                 map_locked = FALSE;
2142                                                 thread_block(THREAD_CONTINUE_NULL);
2143                                                 goto StartAgain;
2144                                         }
2145                                 }
2146                                 RETURN(KERN_NO_SPACE);
2147                         }
2148
2149                         next = entry->vme_next;
2150
2151                         if (map->holelistenabled) {
2152                                 if (entry->vme_end >= end)
2153                                         break;
2154                         } else {
2155                                 /*
2156                                  *      If there are no more entries, we must win.
2157                                  *
2158                                  *      OR
2159                                  *
2160                                  *      If there is another entry, it must be
2161                                  *      after the end of the potential new region.
2162                                  */
2163
2164                                 if (next == vm_map_to_entry(map))
2165                                         break;
2166
2167                                 if (next->vme_start >= end)
2168                                         break;
2169                         }
2170
2171                         /*
2172                          *      Didn't fit -- move to the next entry.
2173                          */
2174
2175                         entry = next;
2176
2177                         if (map->holelistenabled) {
2178                                 if (entry == (vm_map_entry_t) map->holes_list) {
2179                                         /*
2180                                          * Wrapped around
2181                                          */
2182                                         result = KERN_NO_SPACE;
2183                                         goto BailOut;
2184                                 }
2185                                 start = entry->vme_start;
2186                         } else {
2187                                 start = entry->vme_end;
2188                         }
2189
2190                         start = vm_map_round_page(start,
2191                                                   VM_MAP_PAGE_MASK(map));
2192                 }
2193
2194                 if (map->holelistenabled) {
2195                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2196                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2197                         }
2198                 }
2199
2200                 *address = start;
2201                 assert(VM_MAP_PAGE_ALIGNED(*address,
2202                                            VM_MAP_PAGE_MASK(map)));
2203         } else {
2204                 /*
2205                  *      Verify that:
2206                  *              the address doesn't itself violate
2207                  *              the mask requirement.
2208                  */
2209
2210                 vm_map_lock(map);
2211                 map_locked = TRUE;
2212                 if ((start & mask) != 0)
2213                         RETURN(KERN_NO_SPACE);
2214
2215                 /*
2216                  *      ...     the address is within bounds
2217                  */
2218
2219                 end = start + size;
2220
2221                 if ((start < effective_min_offset) ||
2222                     (end > effective_max_offset) ||
2223                     (start >= end)) {
2224                         RETURN(KERN_INVALID_ADDRESS);
2225                 }
2226
2227                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2228                         /*
2229                          * Fixed mapping and "overwrite" flag: attempt to
2230                          * remove all existing mappings in the specified
2231                          * address range, saving them in our "zap_old_map".
2232                          */
2233                         (void) vm_map_delete(map, start, end,
2234                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2235                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2236                                              zap_old_map);
2237                 }
2238
2239                 /*
2240                  *      ...     the starting address isn't allocated
2241                  */
2242
2243                 if (vm_map_lookup_entry(map, start, &entry)) {
2244                         if (! (flags & VM_FLAGS_ALREADY)) {
2245                                 RETURN(KERN_NO_SPACE);
2246                         }
2247                         /*
2248                          * Check if what's already there is what we want.
2249                          */
2250                         tmp_start = start;
2251                         tmp_offset = offset;
2252                         if (entry->vme_start < start) {
2253                                 tmp_start -= start - entry->vme_start;
2254                                 tmp_offset -= start - entry->vme_start;
2255
2256                         }
2257                         for (; entry->vme_start < end;
2258                              entry = entry->vme_next) {
2259                                 /*
2260                                  * Check if the mapping's attributes
2261                                  * match the existing map entry.
2262                                  */
2263                                 if (entry == vm_map_to_entry(map) ||
2264                                     entry->vme_start != tmp_start ||
2265                                     entry->is_sub_map != is_submap ||
2266                                     VME_OFFSET(entry) != tmp_offset ||
2267                                     entry->needs_copy != needs_copy ||
2268                                     entry->protection != cur_protection ||
2269                                     entry->max_protection != max_protection ||
2270                                     entry->inheritance != inheritance ||
2271                                     entry->iokit_acct != iokit_acct ||
2272                                     VME_ALIAS(entry) != alias) {
2273                                         /* not the same mapping ! */
2274                                         RETURN(KERN_NO_SPACE);
2275                                 }
2276                                 /*
2277                                  * Check if the same object is being mapped.
2278                                  */
2279                                 if (is_submap) {
2280                                         if (VME_SUBMAP(entry) !=
2281                                             (vm_map_t) object) {
2282                                                 /* not the same submap */
2283                                                 RETURN(KERN_NO_SPACE);
2284                                         }
2285                                 } else {
2286                                         if (VME_OBJECT(entry) != object) {
2287                                                 /* not the same VM object... */
2288                                                 vm_object_t obj2;
2289
2290                                                 obj2 = VME_OBJECT(entry);
2291                                                 if ((obj2 == VM_OBJECT_NULL ||
2292                                                      obj2->internal) &&
2293                                                     (object == VM_OBJECT_NULL ||
2294                                                      object->internal)) {
2295                                                         /*
2296                                                          * ... but both are
2297                                                          * anonymous memory,
2298                                                          * so equivalent.
2299                                                          */
2300                                                 } else {
2301                                                         RETURN(KERN_NO_SPACE);
2302                                                 }
2303                                         }
2304                                 }
2305
2306                                 tmp_offset += entry->vme_end - entry->vme_start;
2307                                 tmp_start += entry->vme_end - entry->vme_start;
2308                                 if (entry->vme_end >= end) {
2309                                         /* reached the end of our mapping */
2310                                         break;
2311                                 }
2312                         }
2313                         /* it all matches:  let's use what's already there ! */
2314                         RETURN(KERN_MEMORY_PRESENT);
2315                 }
2316
2317                 /*
2318                  *      ...     the next region doesn't overlap the
2319                  *              end point.
2320                  */
2321
2322                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2323                     (entry->vme_next->vme_start < end))
2324                         RETURN(KERN_NO_SPACE);
2325         }
2326
2327         /*
2328          *      At this point,
2329          *              "start" and "end" should define the endpoints of the
2330          *                      available new range, and
2331          *              "entry" should refer to the region before the new
2332          *                      range, and
2333          *
2334          *              the map should be locked.
2335          */
2336
2337         /*
2338          *      See whether we can avoid creating a new entry (and object) by
2339          *      extending one of our neighbors.  [So far, we only attempt to
2340          *      extend from below.]  Note that we can never extend/join
2341          *      purgable objects because they need to remain distinct
2342          *      entities in order to implement their "volatile object"
2343          *      semantics.
2344          */
2345
2346         if (purgable || entry_for_jit) {
2347                 if (object == VM_OBJECT_NULL) {
2348
2349                         object = vm_object_allocate(size);
2350                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2351                         object->true_share = TRUE;
2352                         if (purgable) {
2353                                 task_t owner;
2354                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2355                                 if (map->pmap == kernel_pmap) {
2356                                         /*
2357                                          * Purgeable mappings made in a kernel
2358                                          * map are "owned" by the kernel itself
2359                                          * rather than the current user task
2360                                          * because they're likely to be used by
2361                                          * more than this user task (see
2362                                          * execargs_purgeable_allocate(), for
2363                                          * example).
2364                                          */
2365                                         owner = kernel_task;
2366                                 } else {
2367                                         owner = current_task();
2368                                 }
2369                                 assert(object->vo_purgeable_owner == NULL);
2370                                 assert(object->resident_page_count == 0);
2371                                 assert(object->wired_page_count == 0);
2372                                 vm_object_lock(object);
2373                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2374                                 vm_object_unlock(object);
2375                         }
2376                         offset = (vm_object_offset_t)0;
2377                 }
2378         } else if ((is_submap == FALSE) &&
2379                    (object == VM_OBJECT_NULL) &&
2380                    (entry != vm_map_to_entry(map)) &&
2381                    (entry->vme_end == start) &&
2382                    (!entry->is_shared) &&
2383                    (!entry->is_sub_map) &&
2384                    (!entry->in_transition) &&
2385                    (!entry->needs_wakeup) &&
2386                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2387                    (entry->protection == cur_protection) &&
2388                    (entry->max_protection == max_protection) &&
2389                    (entry->inheritance == inheritance) &&
2390                    ((user_alias == VM_MEMORY_REALLOC) ||
2391                     (VME_ALIAS(entry) == alias)) &&
2392                    (entry->no_cache == no_cache) &&
2393                    (entry->permanent == permanent) &&
2394                    (!entry->superpage_size && !superpage_size) &&
2395                    /*
2396                     * No coalescing if not map-aligned, to avoid propagating
2397                     * that condition any further than needed:
2398                     */
2399                    (!entry->map_aligned || !clear_map_aligned) &&
2400                    (!entry->zero_wired_pages) &&
2401                    (!entry->used_for_jit && !entry_for_jit) &&
2402                    (entry->iokit_acct == iokit_acct) &&
2403                    (!entry->vme_resilient_codesign) &&
2404                    (!entry->vme_resilient_media) &&
2405
2406                    ((entry->vme_end - entry->vme_start) + size <=
2407                     (user_alias == VM_MEMORY_REALLOC ?
2408                      ANON_CHUNK_SIZE :
2409                      NO_COALESCE_LIMIT)) &&
2410
2411                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2412                 if (vm_object_coalesce(VME_OBJECT(entry),
2413                                        VM_OBJECT_NULL,
2414                                        VME_OFFSET(entry),
2415                                        (vm_object_offset_t) 0,
2416                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2417                                        (vm_map_size_t)(end - entry->vme_end))) {
2418
2419                         /*
2420                          *      Coalesced the two objects - can extend
2421                          *      the previous map entry to include the
2422                          *      new range.
2423                          */
2424                         map->size += (end - entry->vme_end);
2425                         assert(entry->vme_start < end);
2426                         assert(VM_MAP_PAGE_ALIGNED(end,
2427                                                    VM_MAP_PAGE_MASK(map)));
2428                         if (__improbable(vm_debug_events))
2429                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2430                         entry->vme_end = end;
2431                         if (map->holelistenabled) {
2432                                 vm_map_store_update_first_free(map, entry, TRUE);
2433                         } else {
2434                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2435                         }
2436                         new_mapping_established = TRUE;
2437                         RETURN(KERN_SUCCESS);
2438                 }
2439         }
2440
2441         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2442         new_entry = NULL;
2443
2444         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2445                 tmp2_end = tmp2_start + step;
2446                 /*
2447                  *      Create a new entry
2448                  *      LP64todo - for now, we can only allocate 4GB internal objects
2449                  *      because the default pager can't page bigger ones.  Remove this
2450                  *      when it can.
2451                  *
2452                  * XXX FBDP
2453                  * The reserved "page zero" in each process's address space can
2454                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2455                  * therefore different VM map entries serves no purpose and just
2456                  * slows down operations on the VM map, so let's not split the
2457                  * allocation into 4GB chunks if the max protection is NONE.  That
2458                  * memory should never be accessible, so it will never get to the
2459                  * default pager.
2460                  */
2461                 tmp_start = tmp2_start;
2462                 if (object == VM_OBJECT_NULL &&
2463                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2464                     max_protection != VM_PROT_NONE &&
2465                     superpage_size == 0)
2466                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2467                 else
2468                         tmp_end = tmp2_end;
2469                 do {
2470                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2471                                                         object, offset, needs_copy,
2472                                                         FALSE, FALSE,
2473                                                         cur_protection, max_protection,
2474                                                         VM_BEHAVIOR_DEFAULT,
2475                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2476                                                         0, no_cache,
2477                                                         permanent,
2478                                                         superpage_size,
2479                                                         clear_map_aligned,
2480                                                         is_submap);
2481
2482                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2483                         VME_ALIAS_SET(new_entry, alias);
2484
2485                         if (entry_for_jit){
2486                                 if (!(map->jit_entry_exists)){
2487                                         new_entry->used_for_jit = TRUE;
2488                                         map->jit_entry_exists = TRUE;
2489                                 }
2490                         }
2491
2492                         if (resilient_codesign &&
2493                             ! ((cur_protection | max_protection) &
2494                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2495                                 new_entry->vme_resilient_codesign = TRUE;
2496                         }
2497
2498                         if (resilient_media &&
2499                             ! ((cur_protection | max_protection) &
2500                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2501                                 new_entry->vme_resilient_media = TRUE;
2502                         }
2503
2504                         assert(!new_entry->iokit_acct);
2505                         if (!is_submap &&
2506                             object != VM_OBJECT_NULL &&
2507                             object->purgable != VM_PURGABLE_DENY) {
2508                                 assert(new_entry->use_pmap);
2509                                 assert(!new_entry->iokit_acct);
2510                                 /*
2511                                  * Turn off pmap accounting since
2512                                  * purgeable objects have their
2513                                  * own ledgers.
2514                                  */
2515                                 new_entry->use_pmap = FALSE;
2516                         } else if (!is_submap &&
2517                                    iokit_acct) {
2518                                 /* alternate accounting */
2519                                 assert(!new_entry->iokit_acct);
2520                                 assert(new_entry->use_pmap);
2521                                 new_entry->iokit_acct = TRUE;
2522                                 new_entry->use_pmap = FALSE;
2523                                 vm_map_iokit_mapped_region(
2524                                         map,
2525                                         (new_entry->vme_end -
2526                                          new_entry->vme_start));
2527                         } else if (!is_submap) {
2528                                 assert(!new_entry->iokit_acct);
2529                                 assert(new_entry->use_pmap);
2530                         }
2531
2532                         if (is_submap) {
2533                                 vm_map_t        submap;
2534                                 boolean_t       submap_is_64bit;
2535                                 boolean_t       use_pmap;
2536
2537                                 assert(new_entry->is_sub_map);
2538                                 assert(!new_entry->use_pmap);
2539                                 assert(!new_entry->iokit_acct);
2540                                 submap = (vm_map_t) object;
2541                                 submap_is_64bit = vm_map_is_64bit(submap);
2542                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2543 #ifndef NO_NESTED_PMAP
2544                                 if (use_pmap && submap->pmap == NULL) {
2545                                         ledger_t ledger = map->pmap->ledger;
2546                                         /* we need a sub pmap to nest... */
2547                                         submap->pmap = pmap_create(ledger, 0,
2548                                             submap_is_64bit);
2549                                         if (submap->pmap == NULL) {
2550                                                 /* let's proceed without nesting... */
2551                                         }
2552                                 }
2553                                 if (use_pmap && submap->pmap != NULL) {
2554                                         kr = pmap_nest(map->pmap,
2555                                                        submap->pmap,
2556                                                        tmp_start,
2557                                                        tmp_start,
2558                                                        tmp_end - tmp_start);
2559                                         if (kr != KERN_SUCCESS) {
2560                                                 printf("vm_map_enter: "
2561                                                        "pmap_nest(0x%llx,0x%llx) "
2562                                                        "error 0x%x\n",
2563                                                        (long long)tmp_start,
2564                                                        (long long)tmp_end,
2565                                                        kr);
2566                                         } else {
2567                                                 /* we're now nested ! */
2568                                                 new_entry->use_pmap = TRUE;
2569                                                 pmap_empty = FALSE;
2570                                         }
2571                                 }
2572 #endif /* NO_NESTED_PMAP */
2573                         }
2574                         entry = new_entry;
2575
2576                         if (superpage_size) {
2577                                 vm_page_t pages, m;
2578                                 vm_object_t sp_object;
2579
2580                                 VME_OFFSET_SET(entry, 0);
2581
2582                                 /* allocate one superpage */
2583                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2584                                 if (kr != KERN_SUCCESS) {
2585                                         /* deallocate whole range... */
2586                                         new_mapping_established = TRUE;
2587                                         /* ... but only up to "tmp_end" */
2588                                         size -= end - tmp_end;
2589                                         RETURN(kr);
2590                                 }
2591
2592                                 /* create one vm_object per superpage */
2593                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2594                                 sp_object->phys_contiguous = TRUE;
2595                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2596                                 VME_OBJECT_SET(entry, sp_object);
2597                                 assert(entry->use_pmap);
2598
2599                                 /* enter the base pages into the object */
2600                                 vm_object_lock(sp_object);
2601                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2602                                         m = pages;
2603                                         pmap_zero_page(m->phys_page);
2604                                         pages = NEXT_PAGE(m);
2605                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2606                                         vm_page_insert_wired(m, sp_object, offset, VM_KERN_MEMORY_OSFMK);
2607                                 }
2608                                 vm_object_unlock(sp_object);
2609                         }
2610                 } while (tmp_end != tmp2_end &&
2611                          (tmp_start = tmp_end) &&
2612                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2613                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2614         }
2615
2616         new_mapping_established = TRUE;
2617
2618 BailOut:
2619         assert(map_locked == TRUE);
2620
2621         if (result == KERN_SUCCESS) {
2622                 vm_prot_t pager_prot;
2623                 memory_object_t pager;
2624
2625 #if DEBUG
2626                 if (pmap_empty &&
2627                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2628                         assert(vm_map_pmap_is_empty(map,
2629                                                     *address,
2630                                                     *address+size));
2631                 }
2632 #endif /* DEBUG */
2633
2634                 /*
2635                  * For "named" VM objects, let the pager know that the
2636                  * memory object is being mapped.  Some pagers need to keep
2637                  * track of this, to know when they can reclaim the memory
2638                  * object, for example.
2639                  * VM calls memory_object_map() for each mapping (specifying
2640                  * the protection of each mapping) and calls
2641                  * memory_object_last_unmap() when all the mappings are gone.
2642                  */
2643                 pager_prot = max_protection;
2644                 if (needs_copy) {
2645                         /*
2646                          * Copy-On-Write mapping: won't modify
2647                          * the memory object.
2648                          */
2649                         pager_prot &= ~VM_PROT_WRITE;
2650                 }
2651                 if (!is_submap &&
2652                     object != VM_OBJECT_NULL &&
2653                     object->named &&
2654                     object->pager != MEMORY_OBJECT_NULL) {
2655                         vm_object_lock(object);
2656                         pager = object->pager;
2657                         if (object->named &&
2658                             pager != MEMORY_OBJECT_NULL) {
2659                                 assert(object->pager_ready);
2660                                 vm_object_mapping_wait(object, THREAD_UNINT);
2661                                 vm_object_mapping_begin(object);
2662                                 vm_object_unlock(object);
2663
2664                                 kr = memory_object_map(pager, pager_prot);
2665                                 assert(kr == KERN_SUCCESS);
2666
2667                                 vm_object_lock(object);
2668                                 vm_object_mapping_end(object);
2669                         }
2670                         vm_object_unlock(object);
2671                 }
2672         }
2673
2674         assert(map_locked == TRUE);
2675
2676         if (!keep_map_locked) {
2677                 vm_map_unlock(map);
2678                 map_locked = FALSE;
2679         }
2680
2681         /*
2682          * We can't hold the map lock if we enter this block.
2683          */
2684
2685         if (result == KERN_SUCCESS) {
2686
2687                 /*      Wire down the new entry if the user
2688                  *      requested all new map entries be wired.
2689                  */
2690                 if ((map->wiring_required)||(superpage_size)) {
2691                         assert(!keep_map_locked);
2692                         pmap_empty = FALSE; /* pmap won't be empty */
2693                         kr = vm_map_wire(map, start, end,
2694                                              new_entry->protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
2695                                              TRUE);
2696                         result = kr;
2697                 }
2698
2699         }
2700
2701         if (result != KERN_SUCCESS) {
2702                 if (new_mapping_established) {
2703                         /*
2704                          * We have to get rid of the new mappings since we
2705                          * won't make them available to the user.
2706                          * Try and do that atomically, to minimize the risk
2707                          * that someone else create new mappings that range.
2708                          */
2709                         zap_new_map = vm_map_create(PMAP_NULL,
2710                                                     *address,
2711                                                     *address + size,
2712                                                     map->hdr.entries_pageable);
2713                         vm_map_set_page_shift(zap_new_map,
2714                                               VM_MAP_PAGE_SHIFT(map));
2715                         vm_map_disable_hole_optimization(zap_new_map);
2716
2717                         if (!map_locked) {
2718                                 vm_map_lock(map);
2719                                 map_locked = TRUE;
2720                         }
2721                         (void) vm_map_delete(map, *address, *address+size,
2722                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2723                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2724                                              zap_new_map);
2725                 }
2726                 if (zap_old_map != VM_MAP_NULL &&
2727                     zap_old_map->hdr.nentries != 0) {
2728                         vm_map_entry_t  entry1, entry2;
2729
2730                         /*
2731                          * The new mapping failed.  Attempt to restore
2732                          * the old mappings, saved in the "zap_old_map".
2733                          */
2734                         if (!map_locked) {
2735                                 vm_map_lock(map);
2736                                 map_locked = TRUE;
2737                         }
2738
2739                         /* first check if the coast is still clear */
2740                         start = vm_map_first_entry(zap_old_map)->vme_start;
2741                         end = vm_map_last_entry(zap_old_map)->vme_end;
2742                         if (vm_map_lookup_entry(map, start, &entry1) ||
2743                             vm_map_lookup_entry(map, end, &entry2) ||
2744                             entry1 != entry2) {
2745                                 /*
2746                                  * Part of that range has already been
2747                                  * re-mapped:  we can't restore the old
2748                                  * mappings...
2749                                  */
2750                                 vm_map_enter_restore_failures++;
2751                         } else {
2752                                 /*
2753                                  * Transfer the saved map entries from
2754                                  * "zap_old_map" to the original "map",
2755                                  * inserting them all after "entry1".
2756                                  */
2757                                 for (entry2 = vm_map_first_entry(zap_old_map);
2758                                      entry2 != vm_map_to_entry(zap_old_map);
2759                                      entry2 = vm_map_first_entry(zap_old_map)) {
2760                                         vm_map_size_t entry_size;
2761
2762                                         entry_size = (entry2->vme_end -
2763                                                       entry2->vme_start);
2764                                         vm_map_store_entry_unlink(zap_old_map,
2765                                                             entry2);
2766                                         zap_old_map->size -= entry_size;
2767                                         vm_map_store_entry_link(map, entry1, entry2);
2768                                         map->size += entry_size;
2769                                         entry1 = entry2;
2770                                 }
2771                                 if (map->wiring_required) {
2772                                         /*
2773                                          * XXX TODO: we should rewire the
2774                                          * old pages here...
2775                                          */
2776                                 }
2777                                 vm_map_enter_restore_successes++;
2778                         }
2779                 }
2780         }
2781
2782         /*
2783          * The caller is responsible for releasing the lock if it requested to
2784          * keep the map locked.
2785          */
2786         if (map_locked && !keep_map_locked) {
2787                 vm_map_unlock(map);
2788         }
2789
2790         /*
2791          * Get rid of the "zap_maps" and all the map entries that
2792          * they may still contain.
2793          */
2794         if (zap_old_map != VM_MAP_NULL) {
2795                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2796                 zap_old_map = VM_MAP_NULL;
2797         }
2798         if (zap_new_map != VM_MAP_NULL) {
2799                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2800                 zap_new_map = VM_MAP_NULL;
2801         }
2802
2803         return result;
2804
2805 #undef  RETURN
2806 }
2807
2808
2809 /*
2810  * Counters for the prefault optimization.
2811  */
2812 int64_t vm_prefault_nb_pages = 0;
2813 int64_t vm_prefault_nb_bailout = 0;
2814
2815 static kern_return_t
2816 vm_map_enter_mem_object_helper(
2817         vm_map_t                target_map,
2818         vm_map_offset_t         *address,
2819         vm_map_size_t           initial_size,
2820         vm_map_offset_t         mask,
2821         int                     flags,
2822         ipc_port_t              port,
2823         vm_object_offset_t      offset,
2824         boolean_t               copy,
2825         vm_prot_t               cur_protection,
2826         vm_prot_t               max_protection,
2827         vm_inherit_t            inheritance,
2828         upl_page_list_ptr_t     page_list,
2829         unsigned int            page_list_count)
2830 {
2831         vm_map_address_t        map_addr;
2832         vm_map_size_t           map_size;
2833         vm_object_t             object;
2834         vm_object_size_t        size;
2835         kern_return_t           result;
2836         boolean_t               mask_cur_protection, mask_max_protection;
2837         boolean_t               try_prefault = (page_list_count != 0);
2838         vm_map_offset_t         offset_in_mapping = 0;
2839
2840         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2841         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2842         cur_protection &= ~VM_PROT_IS_MASK;
2843         max_protection &= ~VM_PROT_IS_MASK;
2844
2845         /*
2846          * Check arguments for validity
2847          */
2848         if ((target_map == VM_MAP_NULL) ||
2849             (cur_protection & ~VM_PROT_ALL) ||
2850             (max_protection & ~VM_PROT_ALL) ||
2851             (inheritance > VM_INHERIT_LAST_VALID) ||
2852             (try_prefault && (copy || !page_list)) ||
2853             initial_size == 0) {
2854                 return KERN_INVALID_ARGUMENT;
2855         }
2856
2857         {
2858                 map_addr = vm_map_trunc_page(*address,
2859                                              VM_MAP_PAGE_MASK(target_map));
2860                 map_size = vm_map_round_page(initial_size,
2861                                              VM_MAP_PAGE_MASK(target_map));
2862         }
2863         size = vm_object_round_page(initial_size);
2864
2865         /*
2866          * Find the vm object (if any) corresponding to this port.
2867          */
2868         if (!IP_VALID(port)) {
2869                 object = VM_OBJECT_NULL;
2870                 offset = 0;
2871                 copy = FALSE;
2872         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2873                 vm_named_entry_t        named_entry;
2874
2875                 named_entry = (vm_named_entry_t) port->ip_kobject;
2876
2877                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2878                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2879                         offset += named_entry->data_offset;
2880                 }
2881
2882                 /* a few checks to make sure user is obeying rules */
2883                 if (size == 0) {
2884                         if (offset >= named_entry->size)
2885                                 return KERN_INVALID_RIGHT;
2886                         size = named_entry->size - offset;
2887                 }
2888                 if (mask_max_protection) {
2889                         max_protection &= named_entry->protection;
2890                 }
2891                 if (mask_cur_protection) {
2892                         cur_protection &= named_entry->protection;
2893                 }
2894                 if ((named_entry->protection & max_protection) !=
2895                     max_protection)
2896                         return KERN_INVALID_RIGHT;
2897                 if ((named_entry->protection & cur_protection) !=
2898                     cur_protection)
2899                         return KERN_INVALID_RIGHT;
2900                 if (offset + size < offset) {
2901                         /* overflow */
2902                         return KERN_INVALID_ARGUMENT;
2903                 }
2904                 if (named_entry->size < (offset + initial_size)) {
2905                         return KERN_INVALID_ARGUMENT;
2906                 }
2907
2908                 if (named_entry->is_copy) {
2909                         /* for a vm_map_copy, we can only map it whole */
2910                         if ((size != named_entry->size) &&
2911                             (vm_map_round_page(size,
2912                                                VM_MAP_PAGE_MASK(target_map)) ==
2913                              named_entry->size)) {
2914                                 /* XXX FBDP use the rounded size... */
2915                                 size = vm_map_round_page(
2916                                         size,
2917                                         VM_MAP_PAGE_MASK(target_map));
2918                         }
2919
2920                         if (!(flags & VM_FLAGS_ANYWHERE) &&
2921                             (offset != 0 ||
2922                              size != named_entry->size)) {
2923                                 /*
2924                                  * XXX for a mapping at a "fixed" address,
2925                                  * we can't trim after mapping the whole
2926                                  * memory entry, so reject a request for a
2927                                  * partial mapping.
2928                                  */
2929                                 return KERN_INVALID_ARGUMENT;
2930                         }
2931                 }
2932
2933                 /* the callers parameter offset is defined to be the */
2934                 /* offset from beginning of named entry offset in object */
2935                 offset = offset + named_entry->offset;
2936
2937                 if (! VM_MAP_PAGE_ALIGNED(size,
2938                                           VM_MAP_PAGE_MASK(target_map))) {
2939                         /*
2940                          * Let's not map more than requested;
2941                          * vm_map_enter() will handle this "not map-aligned"
2942                          * case.
2943                          */
2944                         map_size = size;
2945                 }
2946
2947                 named_entry_lock(named_entry);
2948                 if (named_entry->is_sub_map) {
2949                         vm_map_t                submap;
2950
2951                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
2952                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
2953                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2954                         }
2955
2956                         submap = named_entry->backing.map;
2957                         vm_map_lock(submap);
2958                         vm_map_reference(submap);
2959                         vm_map_unlock(submap);
2960                         named_entry_unlock(named_entry);
2961
2962                         result = vm_map_enter(target_map,
2963                                               &map_addr,
2964                                               map_size,
2965                                               mask,
2966                                               flags | VM_FLAGS_SUBMAP,
2967                                               (vm_object_t) submap,
2968                                               offset,
2969                                               copy,
2970                                               cur_protection,
2971                                               max_protection,
2972                                               inheritance);
2973                         if (result != KERN_SUCCESS) {
2974                                 vm_map_deallocate(submap);
2975                         } else {
2976                                 /*
2977                                  * No need to lock "submap" just to check its
2978                                  * "mapped" flag: that flag is never reset
2979                                  * once it's been set and if we race, we'll
2980                                  * just end up setting it twice, which is OK.
2981                                  */
2982                                 if (submap->mapped_in_other_pmaps == FALSE &&
2983                                     vm_map_pmap(submap) != PMAP_NULL &&
2984                                     vm_map_pmap(submap) !=
2985                                     vm_map_pmap(target_map)) {
2986                                         /*
2987                                          * This submap is being mapped in a map
2988                                          * that uses a different pmap.
2989                                          * Set its "mapped_in_other_pmaps" flag
2990                                          * to indicate that we now need to
2991                                          * remove mappings from all pmaps rather
2992                                          * than just the submap's pmap.
2993                                          */
2994                                         vm_map_lock(submap);
2995                                         submap->mapped_in_other_pmaps = TRUE;
2996                                         vm_map_unlock(submap);
2997                                 }
2998                                 *address = map_addr;
2999                         }
3000                         return result;
3001
3002                 } else if (named_entry->is_pager) {
3003                         unsigned int    access;
3004                         vm_prot_t       protections;
3005                         unsigned int    wimg_mode;
3006
3007                         protections = named_entry->protection & VM_PROT_ALL;
3008                         access = GET_MAP_MEM(named_entry->protection);
3009
3010                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR|
3011                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3012                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3013                         }
3014
3015                         object = vm_object_enter(named_entry->backing.pager,
3016                                                  named_entry->size,
3017                                                  named_entry->internal,
3018                                                  FALSE,
3019                                                  FALSE);
3020                         if (object == VM_OBJECT_NULL) {
3021                                 named_entry_unlock(named_entry);
3022                                 return KERN_INVALID_OBJECT;
3023                         }
3024
3025                         /* JMM - drop reference on pager here */
3026
3027                         /* create an extra ref for the named entry */
3028                         vm_object_lock(object);
3029                         vm_object_reference_locked(object);
3030                         named_entry->backing.object = object;
3031                         named_entry->is_pager = FALSE;
3032                         named_entry_unlock(named_entry);
3033
3034                         wimg_mode = object->wimg_bits;
3035
3036                         if (access == MAP_MEM_IO) {
3037                                 wimg_mode = VM_WIMG_IO;
3038                         } else if (access == MAP_MEM_COPYBACK) {
3039                                 wimg_mode = VM_WIMG_USE_DEFAULT;
3040                         } else if (access == MAP_MEM_INNERWBACK) {
3041                                 wimg_mode = VM_WIMG_INNERWBACK;
3042                         } else if (access == MAP_MEM_WTHRU) {
3043                                 wimg_mode = VM_WIMG_WTHRU;
3044                         } else if (access == MAP_MEM_WCOMB) {
3045                                 wimg_mode = VM_WIMG_WCOMB;
3046                         }
3047
3048                         /* wait for object (if any) to be ready */
3049                         if (!named_entry->internal) {
3050                                 while (!object->pager_ready) {
3051                                         vm_object_wait(
3052                                                 object,
3053                                                 VM_OBJECT_EVENT_PAGER_READY,
3054                                                 THREAD_UNINT);
3055                                         vm_object_lock(object);
3056                                 }
3057                         }
3058
3059                         if (object->wimg_bits != wimg_mode)
3060                                 vm_object_change_wimg_mode(object, wimg_mode);
3061
3062 #if VM_OBJECT_TRACKING_OP_TRUESHARE
3063                         if (!object->true_share &&
3064                             vm_object_tracking_inited) {
3065                                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
3066                                 int num = 0;
3067
3068                                 num = OSBacktrace(bt,
3069                                                   VM_OBJECT_TRACKING_BTDEPTH);
3070                                 btlog_add_entry(vm_object_tracking_btlog,
3071                                                 object,
3072                                                 VM_OBJECT_TRACKING_OP_TRUESHARE,
3073                                                 bt,
3074                                                 num);
3075                         }
3076 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
3077
3078                         object->true_share = TRUE;
3079
3080                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
3081                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
3082                         vm_object_unlock(object);
3083
3084                 } else if (named_entry->is_copy) {
3085                         kern_return_t   kr;
3086                         vm_map_copy_t   copy_map;
3087                         vm_map_entry_t  copy_entry;
3088                         vm_map_offset_t copy_addr;
3089
3090                         if (flags & ~(VM_FLAGS_FIXED |
3091                                       VM_FLAGS_ANYWHERE |
3092                                       VM_FLAGS_OVERWRITE |
3093                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
3094                                       VM_FLAGS_RETURN_DATA_ADDR)) {
3095                                 named_entry_unlock(named_entry);
3096                                 return KERN_INVALID_ARGUMENT;
3097                         }
3098
3099                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3100                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3101                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3102                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3103                                         offset_in_mapping &= ~((signed)(0xFFF));
3104                                 offset = vm_object_trunc_page(offset);
3105                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3106                         }
3107
3108                         copy_map = named_entry->backing.copy;
3109                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3110                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3111                                 /* unsupported type; should not happen */
3112                                 printf("vm_map_enter_mem_object: "
3113                                        "memory_entry->backing.copy "
3114                                        "unsupported type 0x%x\n",
3115                                        copy_map->type);
3116                                 named_entry_unlock(named_entry);
3117                                 return KERN_INVALID_ARGUMENT;
3118                         }
3119
3120                         /* reserve a contiguous range */
3121                         kr = vm_map_enter(target_map,
3122                                           &map_addr,
3123                                           /* map whole mem entry, trim later: */
3124                                           named_entry->size,
3125                                           mask,
3126                                           flags & (VM_FLAGS_ANYWHERE |
3127                                                    VM_FLAGS_OVERWRITE |
3128                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
3129                                                    VM_FLAGS_RETURN_DATA_ADDR),
3130                                           VM_OBJECT_NULL,
3131                                           0,
3132                                           FALSE, /* copy */
3133                                           cur_protection,
3134                                           max_protection,
3135                                           inheritance);
3136                         if (kr != KERN_SUCCESS) {
3137                                 named_entry_unlock(named_entry);
3138                                 return kr;
3139                         }
3140
3141                         copy_addr = map_addr;
3142
3143                         for (copy_entry = vm_map_copy_first_entry(copy_map);
3144                              copy_entry != vm_map_copy_to_entry(copy_map);
3145                              copy_entry = copy_entry->vme_next) {
3146                                 int                     remap_flags = 0;
3147                                 vm_map_t                copy_submap;
3148                                 vm_object_t             copy_object;
3149                                 vm_map_size_t           copy_size;
3150                                 vm_object_offset_t      copy_offset;
3151
3152                                 copy_offset = VME_OFFSET(copy_entry);
3153                                 copy_size = (copy_entry->vme_end -
3154                                              copy_entry->vme_start);
3155
3156                                 /* sanity check */
3157                                 if ((copy_addr + copy_size) >
3158                                     (map_addr +
3159                                      named_entry->size /* XXX full size */ )) {
3160                                         /* over-mapping too much !? */
3161                                         kr = KERN_INVALID_ARGUMENT;
3162                                         /* abort */
3163                                         break;
3164                                 }
3165
3166                                 /* take a reference on the object */
3167                                 if (copy_entry->is_sub_map) {
3168                                         remap_flags |= VM_FLAGS_SUBMAP;
3169                                         copy_submap = VME_SUBMAP(copy_entry);
3170                                         vm_map_lock(copy_submap);
3171                                         vm_map_reference(copy_submap);
3172                                         vm_map_unlock(copy_submap);
3173                                         copy_object = (vm_object_t) copy_submap;
3174                                 } else {
3175                                         copy_object = VME_OBJECT(copy_entry);
3176                                         vm_object_reference(copy_object);
3177                                 }
3178
3179                                 /* over-map the object into destination */
3180                                 remap_flags |= flags;
3181                                 remap_flags |= VM_FLAGS_FIXED;
3182                                 remap_flags |= VM_FLAGS_OVERWRITE;
3183                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
3184                                 kr = vm_map_enter(target_map,
3185                                                   &copy_addr,
3186                                                   copy_size,
3187                                                   (vm_map_offset_t) 0,
3188                                                   remap_flags,
3189                                                   copy_object,
3190                                                   copy_offset,
3191                                                   copy,
3192                                                   cur_protection,
3193                                                   max_protection,
3194                                                   inheritance);
3195                                 if (kr != KERN_SUCCESS) {
3196                                         if (copy_entry->is_sub_map) {
3197                                                 vm_map_deallocate(copy_submap);
3198                                         } else {
3199                                                 vm_object_deallocate(copy_object);
3200                                         }
3201                                         /* abort */
3202                                         break;
3203                                 }
3204
3205                                 /* next mapping */
3206                                 copy_addr += copy_size;
3207                         }
3208
3209                         if (kr == KERN_SUCCESS) {
3210                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3211                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3212                                         *address = map_addr + offset_in_mapping;
3213                                 } else {
3214                                         *address = map_addr;
3215                                 }
3216
3217                                 if (offset) {
3218                                         /*
3219                                          * Trim in front, from 0 to "offset".
3220                                          */
3221                                         vm_map_remove(target_map,
3222                                                       map_addr,
3223                                                       map_addr + offset,
3224                                                       0);
3225                                         *address += offset;
3226                                 }
3227                                 if (offset + map_size < named_entry->size) {
3228                                         /*
3229                                          * Trim in back, from
3230                                          * "offset + map_size" to
3231                                          * "named_entry->size".
3232                                          */
3233                                         vm_map_remove(target_map,
3234                                                       (map_addr +
3235                                                        offset + map_size),
3236                                                       (map_addr +
3237                                                        named_entry->size),
3238                                                       0);
3239                                 }
3240                         }
3241                         named_entry_unlock(named_entry);
3242
3243                         if (kr != KERN_SUCCESS) {
3244                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
3245                                         /* deallocate the contiguous range */
3246                                         (void) vm_deallocate(target_map,
3247                                                              map_addr,
3248                                                              map_size);
3249                                 }
3250                         }
3251
3252                         return kr;
3253
3254                 } else {
3255                         /* This is the case where we are going to map */
3256                         /* an already mapped object.  If the object is */
3257                         /* not ready it is internal.  An external     */
3258                         /* object cannot be mapped until it is ready  */
3259                         /* we can therefore avoid the ready check     */
3260                         /* in this case.  */
3261                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3262                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3263                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3264                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3265                                         offset_in_mapping &= ~((signed)(0xFFF));
3266                                 offset = vm_object_trunc_page(offset);
3267                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3268                         }
3269
3270                         object = named_entry->backing.object;
3271                         assert(object != VM_OBJECT_NULL);
3272                         named_entry_unlock(named_entry);
3273                         vm_object_reference(object);
3274                 }
3275         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
3276                 /*
3277                  * JMM - This is temporary until we unify named entries
3278                  * and raw memory objects.
3279                  *
3280                  * Detected fake ip_kotype for a memory object.  In
3281                  * this case, the port isn't really a port at all, but
3282                  * instead is just a raw memory object.
3283                  */
3284                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3285                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3286                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
3287                 }
3288
3289                 object = vm_object_enter((memory_object_t)port,
3290                                          size, FALSE, FALSE, FALSE);
3291                 if (object == VM_OBJECT_NULL)
3292                         return KERN_INVALID_OBJECT;
3293
3294                 /* wait for object (if any) to be ready */
3295                 if (object != VM_OBJECT_NULL) {
3296                         if (object == kernel_object) {
3297                                 printf("Warning: Attempt to map kernel object"
3298                                         " by a non-private kernel entity\n");
3299                                 return KERN_INVALID_OBJECT;
3300                         }
3301                         if (!object->pager_ready) {
3302                                 vm_object_lock(object);
3303
3304                                 while (!object->pager_ready) {
3305                                         vm_object_wait(object,
3306                                                        VM_OBJECT_EVENT_PAGER_READY,
3307                                                        THREAD_UNINT);
3308                                         vm_object_lock(object);
3309                                 }
3310                                 vm_object_unlock(object);
3311                         }
3312                 }
3313         } else {
3314                 return KERN_INVALID_OBJECT;
3315         }
3316
3317         if (object != VM_OBJECT_NULL &&
3318             object->named &&
3319             object->pager != MEMORY_OBJECT_NULL &&
3320             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3321                 memory_object_t pager;
3322                 vm_prot_t       pager_prot;
3323                 kern_return_t   kr;
3324
3325                 /*
3326                  * For "named" VM objects, let the pager know that the
3327                  * memory object is being mapped.  Some pagers need to keep
3328                  * track of this, to know when they can reclaim the memory
3329                  * object, for example.
3330                  * VM calls memory_object_map() for each mapping (specifying
3331                  * the protection of each mapping) and calls
3332                  * memory_object_last_unmap() when all the mappings are gone.
3333                  */
3334                 pager_prot = max_protection;
3335                 if (copy) {
3336                         /*
3337                          * Copy-On-Write mapping: won't modify the
3338                          * memory object.
3339                          */
3340                         pager_prot &= ~VM_PROT_WRITE;
3341                 }
3342                 vm_object_lock(object);
3343                 pager = object->pager;
3344                 if (object->named &&
3345                     pager != MEMORY_OBJECT_NULL &&
3346                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3347                         assert(object->pager_ready);
3348                         vm_object_mapping_wait(object, THREAD_UNINT);
3349                         vm_object_mapping_begin(object);
3350                         vm_object_unlock(object);
3351
3352                         kr = memory_object_map(pager, pager_prot);
3353                         assert(kr == KERN_SUCCESS);
3354
3355                         vm_object_lock(object);
3356                         vm_object_mapping_end(object);
3357                 }
3358                 vm_object_unlock(object);
3359         }
3360
3361         /*
3362          *      Perform the copy if requested
3363          */
3364
3365         if (copy) {
3366                 vm_object_t             new_object;
3367                 vm_object_offset_t      new_offset;
3368
3369                 result = vm_object_copy_strategically(object, offset,
3370                                                       map_size,
3371                                                       &new_object, &new_offset,
3372                                                       &copy);
3373
3374
3375                 if (result == KERN_MEMORY_RESTART_COPY) {
3376                         boolean_t success;
3377                         boolean_t src_needs_copy;
3378
3379                         /*
3380                          * XXX
3381                          * We currently ignore src_needs_copy.
3382                          * This really is the issue of how to make
3383                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3384                          * non-kernel users to use. Solution forthcoming.
3385                          * In the meantime, since we don't allow non-kernel
3386                          * memory managers to specify symmetric copy,
3387                          * we won't run into problems here.
3388                          */
3389                         new_object = object;
3390                         new_offset = offset;
3391                         success = vm_object_copy_quickly(&new_object,
3392                                                          new_offset,
3393                                                          map_size,
3394                                                          &src_needs_copy,
3395                                                          &copy);
3396                         assert(success);
3397                         result = KERN_SUCCESS;
3398                 }
3399                 /*
3400                  *      Throw away the reference to the
3401                  *      original object, as it won't be mapped.
3402                  */
3403
3404                 vm_object_deallocate(object);
3405
3406                 if (result != KERN_SUCCESS) {
3407                         return result;
3408                 }
3409
3410                 object = new_object;
3411                 offset = new_offset;
3412         }
3413
3414         /*
3415          * If users want to try to prefault pages, the mapping and prefault
3416          * needs to be atomic.
3417          */
3418         if (try_prefault)
3419                 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3420
3421         {
3422                 result = vm_map_enter(target_map,
3423                                       &map_addr, map_size,
3424                                       (vm_map_offset_t)mask,
3425                                       flags,
3426                                       object, offset,
3427                                       copy,
3428                                       cur_protection, max_protection,
3429                                       inheritance);
3430         }
3431         if (result != KERN_SUCCESS)
3432                 vm_object_deallocate(object);
3433
3434         /*
3435          * Try to prefault, and do not forget to release the vm map lock.
3436          */
3437         if (result == KERN_SUCCESS && try_prefault) {
3438                 mach_vm_address_t va = map_addr;
3439                 kern_return_t kr = KERN_SUCCESS;
3440                 unsigned int i = 0;
3441
3442                 for (i = 0; i < page_list_count; ++i) {
3443                         if (UPL_VALID_PAGE(page_list, i)) {
3444                                 /*
3445                                  * If this function call failed, we should stop
3446                                  * trying to optimize, other calls are likely
3447                                  * going to fail too.
3448                                  *
3449                                  * We are not gonna report an error for such
3450                                  * failure though. That's an optimization, not
3451                                  * something critical.
3452                                  */
3453                                 kr = pmap_enter_options(target_map->pmap,
3454                                                         va, UPL_PHYS_PAGE(page_list, i),
3455                                                         cur_protection, VM_PROT_NONE,
3456                                                         0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3457                                 if (kr != KERN_SUCCESS) {
3458                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
3459                                         break;
3460                                 }
3461                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
3462                         }
3463
3464                         /* Next virtual address */
3465                         va += PAGE_SIZE;
3466                 }
3467                 vm_map_unlock(target_map);
3468         }
3469
3470         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3471                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3472                 *address = map_addr + offset_in_mapping;
3473         } else {
3474                 *address = map_addr;
3475         }
3476         return result;
3477 }
3478
3479 kern_return_t
3480 vm_map_enter_mem_object(
3481         vm_map_t                target_map,
3482         vm_map_offset_t         *address,
3483         vm_map_size_t           initial_size,
3484         vm_map_offset_t         mask,
3485         int                     flags,
3486         ipc_port_t              port,
3487         vm_object_offset_t      offset,
3488         boolean_t               copy,
3489         vm_prot_t               cur_protection,
3490         vm_prot_t               max_protection,
3491         vm_inherit_t            inheritance)
3492 {
3493         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3494                                               port, offset, copy, cur_protection, max_protection,
3495                                               inheritance, NULL, 0);
3496 }
3497
3498 kern_return_t
3499 vm_map_enter_mem_object_prefault(
3500         vm_map_t                target_map,
3501         vm_map_offset_t         *address,
3502         vm_map_size_t           initial_size,
3503         vm_map_offset_t         mask,
3504         int                     flags,
3505         ipc_port_t              port,
3506         vm_object_offset_t      offset,
3507         vm_prot_t               cur_protection,
3508         vm_prot_t               max_protection,
3509         upl_page_list_ptr_t     page_list,
3510         unsigned int            page_list_count)
3511 {
3512         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3513                                               port, offset, FALSE, cur_protection, max_protection,
3514                                               VM_INHERIT_DEFAULT, page_list, page_list_count);
3515 }
3516
3517
3518 kern_return_t
3519 vm_map_enter_mem_object_control(
3520         vm_map_t                target_map,
3521         vm_map_offset_t         *address,
3522         vm_map_size_t           initial_size,
3523         vm_map_offset_t         mask,
3524         int                     flags,
3525         memory_object_control_t control,
3526         vm_object_offset_t      offset,
3527         boolean_t               copy,
3528         vm_prot_t               cur_protection,
3529         vm_prot_t               max_protection,
3530         vm_inherit_t            inheritance)
3531 {
3532         vm_map_address_t        map_addr;
3533         vm_map_size_t           map_size;
3534         vm_object_t             object;
3535         vm_object_size_t        size;
3536         kern_return_t           result;
3537         memory_object_t         pager;
3538         vm_prot_t               pager_prot;
3539         kern_return_t           kr;
3540
3541         /*
3542          * Check arguments for validity
3543          */
3544         if ((target_map == VM_MAP_NULL) ||
3545             (cur_protection & ~VM_PROT_ALL) ||
3546             (max_protection & ~VM_PROT_ALL) ||
3547             (inheritance > VM_INHERIT_LAST_VALID) ||
3548             initial_size == 0) {
3549                 return KERN_INVALID_ARGUMENT;
3550         }
3551
3552         {
3553                 map_addr = vm_map_trunc_page(*address,
3554                                              VM_MAP_PAGE_MASK(target_map));
3555                 map_size = vm_map_round_page(initial_size,
3556                                              VM_MAP_PAGE_MASK(target_map));
3557         }
3558         size = vm_object_round_page(initial_size);
3559
3560         object = memory_object_control_to_vm_object(control);
3561
3562         if (object == VM_OBJECT_NULL)
3563                 return KERN_INVALID_OBJECT;
3564
3565         if (object == kernel_object) {
3566                 printf("Warning: Attempt to map kernel object"
3567                        " by a non-private kernel entity\n");
3568                 return KERN_INVALID_OBJECT;
3569         }
3570
3571         vm_object_lock(object);
3572         object->ref_count++;
3573         vm_object_res_reference(object);
3574
3575         /*
3576          * For "named" VM objects, let the pager know that the
3577          * memory object is being mapped.  Some pagers need to keep
3578          * track of this, to know when they can reclaim the memory
3579          * object, for example.
3580          * VM calls memory_object_map() for each mapping (specifying
3581          * the protection of each mapping) and calls
3582          * memory_object_last_unmap() when all the mappings are gone.
3583          */
3584         pager_prot = max_protection;
3585         if (copy) {
3586                 pager_prot &= ~VM_PROT_WRITE;
3587         }
3588         pager = object->pager;
3589         if (object->named &&
3590             pager != MEMORY_OBJECT_NULL &&
3591             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3592                 assert(object->pager_ready);
3593                 vm_object_mapping_wait(object, THREAD_UNINT);
3594                 vm_object_mapping_begin(object);
3595                 vm_object_unlock(object);
3596
3597                 kr = memory_object_map(pager, pager_prot);
3598                 assert(kr == KERN_SUCCESS);
3599
3600                 vm_object_lock(object);
3601                 vm_object_mapping_end(object);
3602         }
3603         vm_object_unlock(object);
3604
3605         /*
3606          *      Perform the copy if requested
3607          */
3608
3609         if (copy) {
3610                 vm_object_t             new_object;
3611                 vm_object_offset_t      new_offset;
3612
3613                 result = vm_object_copy_strategically(object, offset, size,
3614                                                       &new_object, &new_offset,
3615                                                       &copy);
3616
3617
3618                 if (result == KERN_MEMORY_RESTART_COPY) {
3619                         boolean_t success;
3620                         boolean_t src_needs_copy;
3621
3622                         /*
3623                          * XXX
3624                          * We currently ignore src_needs_copy.
3625                          * This really is the issue of how to make
3626                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3627                          * non-kernel users to use. Solution forthcoming.
3628                          * In the meantime, since we don't allow non-kernel
3629                          * memory managers to specify symmetric copy,
3630                          * we won't run into problems here.
3631                          */
3632                         new_object = object;
3633                         new_offset = offset;
3634                         success = vm_object_copy_quickly(&new_object,
3635                                                          new_offset, size,
3636                                                          &src_needs_copy,
3637                                                          &copy);
3638                         assert(success);
3639                         result = KERN_SUCCESS;
3640                 }
3641                 /*
3642                  *      Throw away the reference to the
3643                  *      original object, as it won't be mapped.
3644                  */
3645
3646                 vm_object_deallocate(object);
3647
3648                 if (result != KERN_SUCCESS) {
3649                         return result;
3650                 }
3651
3652                 object = new_object;
3653                 offset = new_offset;
3654         }
3655
3656         {
3657                 result = vm_map_enter(target_map,
3658                                       &map_addr, map_size,
3659                                       (vm_map_offset_t)mask,
3660                                       flags,
3661                                       object, offset,
3662                                       copy,
3663                                       cur_protection, max_protection,
3664                                       inheritance);
3665         }
3666         if (result != KERN_SUCCESS)
3667                 vm_object_deallocate(object);
3668         *address = map_addr;
3669
3670         return result;
3671 }
3672
3673
3674 #if     VM_CPM
3675
3676 #ifdef MACH_ASSERT
3677 extern pmap_paddr_t     avail_start, avail_end;
3678 #endif
3679
3680 /*
3681  *      Allocate memory in the specified map, with the caveat that
3682  *      the memory is physically contiguous.  This call may fail
3683  *      if the system can't find sufficient contiguous memory.
3684  *      This call may cause or lead to heart-stopping amounts of
3685  *      paging activity.
3686  *
3687  *      Memory obtained from this call should be freed in the
3688  *      normal way, viz., via vm_deallocate.
3689  */
3690 kern_return_t
3691 vm_map_enter_cpm(
3692         vm_map_t                map,
3693         vm_map_offset_t *addr,
3694         vm_map_size_t           size,
3695         int                     flags)
3696 {
3697         vm_object_t             cpm_obj;
3698         pmap_t                  pmap;
3699         vm_page_t               m, pages;
3700         kern_return_t           kr;
3701         vm_map_offset_t         va, start, end, offset;
3702 #if     MACH_ASSERT
3703         vm_map_offset_t         prev_addr = 0;
3704 #endif  /* MACH_ASSERT */
3705
3706         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3707         vm_tag_t tag;
3708
3709         VM_GET_FLAGS_ALIAS(flags, tag);
3710
3711         if (size == 0) {
3712                 *addr = 0;
3713                 return KERN_SUCCESS;
3714         }
3715         if (anywhere)
3716                 *addr = vm_map_min(map);
3717         else
3718                 *addr = vm_map_trunc_page(*addr,
3719                                           VM_MAP_PAGE_MASK(map));
3720         size = vm_map_round_page(size,
3721                                  VM_MAP_PAGE_MASK(map));
3722
3723         /*
3724          * LP64todo - cpm_allocate should probably allow
3725          * allocations of >4GB, but not with the current
3726          * algorithm, so just cast down the size for now.
3727          */
3728         if (size > VM_MAX_ADDRESS)
3729                 return KERN_RESOURCE_SHORTAGE;
3730         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3731                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3732                 return kr;
3733
3734         cpm_obj = vm_object_allocate((vm_object_size_t)size);
3735         assert(cpm_obj != VM_OBJECT_NULL);
3736         assert(cpm_obj->internal);
3737         assert(cpm_obj->vo_size == (vm_object_size_t)size);
3738         assert(cpm_obj->can_persist == FALSE);
3739         assert(cpm_obj->pager_created == FALSE);
3740         assert(cpm_obj->pageout == FALSE);
3741         assert(cpm_obj->shadow == VM_OBJECT_NULL);
3742
3743         /*
3744          *      Insert pages into object.
3745          */
3746
3747         vm_object_lock(cpm_obj);
3748         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3749                 m = pages;
3750                 pages = NEXT_PAGE(m);
3751                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3752
3753                 assert(!m->gobbled);
3754                 assert(!m->wanted);
3755                 assert(!m->pageout);
3756                 assert(!m->tabled);
3757                 assert(VM_PAGE_WIRED(m));
3758                 /*
3759                  * ENCRYPTED SWAP:
3760                  * "m" is not supposed to be pageable, so it
3761                  * should not be encrypted.  It wouldn't be safe
3762                  * to enter it in a new VM object while encrypted.
3763                  */
3764                 ASSERT_PAGE_DECRYPTED(m);
3765                 assert(m->busy);
3766                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3767
3768                 m->busy = FALSE;
3769                 vm_page_insert(m, cpm_obj, offset);
3770         }
3771         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3772         vm_object_unlock(cpm_obj);
3773
3774         /*
3775          *      Hang onto a reference on the object in case a
3776          *      multi-threaded application for some reason decides
3777          *      to deallocate the portion of the address space into
3778          *      which we will insert this object.
3779          *
3780          *      Unfortunately, we must insert the object now before
3781          *      we can talk to the pmap module about which addresses
3782          *      must be wired down.  Hence, the race with a multi-
3783          *      threaded app.
3784          */
3785         vm_object_reference(cpm_obj);
3786
3787         /*
3788          *      Insert object into map.
3789          */
3790
3791         kr = vm_map_enter(
3792                 map,
3793                 addr,
3794                 size,
3795                 (vm_map_offset_t)0,
3796                 flags,
3797                 cpm_obj,
3798                 (vm_object_offset_t)0,
3799                 FALSE,
3800                 VM_PROT_ALL,
3801                 VM_PROT_ALL,
3802                 VM_INHERIT_DEFAULT);
3803
3804         if (kr != KERN_SUCCESS) {
3805                 /*
3806                  *      A CPM object doesn't have can_persist set,
3807                  *      so all we have to do is deallocate it to
3808                  *      free up these pages.
3809                  */
3810                 assert(cpm_obj->pager_created == FALSE);
3811                 assert(cpm_obj->can_persist == FALSE);
3812                 assert(cpm_obj->pageout == FALSE);
3813                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3814                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3815                 vm_object_deallocate(cpm_obj); /* kill creation ref */
3816         }
3817
3818         /*
3819          *      Inform the physical mapping system that the
3820          *      range of addresses may not fault, so that
3821          *      page tables and such can be locked down as well.
3822          */
3823         start = *addr;
3824         end = start + size;
3825         pmap = vm_map_pmap(map);
3826         pmap_pageable(pmap, start, end, FALSE);
3827
3828         /*
3829          *      Enter each page into the pmap, to avoid faults.
3830          *      Note that this loop could be coded more efficiently,
3831          *      if the need arose, rather than looking up each page
3832          *      again.
3833          */
3834         for (offset = 0, va = start; offset < size;
3835              va += PAGE_SIZE, offset += PAGE_SIZE) {
3836                 int type_of_fault;
3837
3838                 vm_object_lock(cpm_obj);
3839                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3840                 assert(m != VM_PAGE_NULL);
3841
3842                 vm_page_zero_fill(m);
3843
3844                 type_of_fault = DBG_ZERO_FILL_FAULT;
3845
3846                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3847                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3848                                &type_of_fault);
3849
3850                 vm_object_unlock(cpm_obj);
3851         }
3852
3853 #if     MACH_ASSERT
3854         /*
3855          *      Verify ordering in address space.
3856          */
3857         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3858                 vm_object_lock(cpm_obj);
3859                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3860                 vm_object_unlock(cpm_obj);
3861                 if (m == VM_PAGE_NULL)
3862                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3863                               cpm_obj, (uint64_t)offset);
3864                 assert(m->tabled);
3865                 assert(!m->busy);
3866                 assert(!m->wanted);
3867                 assert(!m->fictitious);
3868                 assert(!m->private);
3869                 assert(!m->absent);
3870                 assert(!m->error);
3871                 assert(!m->cleaning);
3872                 assert(!m->laundry);
3873                 assert(!m->precious);
3874                 assert(!m->clustered);
3875                 if (offset != 0) {
3876                         if (m->phys_page != prev_addr + 1) {
3877                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3878                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
3879                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3880                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3881                                 panic("vm_allocate_cpm:  pages not contig!");
3882                         }
3883                 }
3884                 prev_addr = m->phys_page;
3885         }
3886 #endif  /* MACH_ASSERT */
3887
3888         vm_object_deallocate(cpm_obj); /* kill extra ref */
3889
3890         return kr;
3891 }
3892
3893
3894 #else   /* VM_CPM */
3895
3896 /*
3897  *      Interface is defined in all cases, but unless the kernel
3898  *      is built explicitly for this option, the interface does
3899  *      nothing.
3900  */
3901
3902 kern_return_t
3903 vm_map_enter_cpm(
3904         __unused vm_map_t       map,
3905         __unused vm_map_offset_t        *addr,
3906         __unused vm_map_size_t  size,
3907         __unused int            flags)
3908 {
3909         return KERN_FAILURE;
3910 }
3911 #endif /* VM_CPM */
3912
3913 /* Not used without nested pmaps */
3914 #ifndef NO_NESTED_PMAP
3915 /*
3916  * Clip and unnest a portion of a nested submap mapping.
3917  */
3918
3919
3920 static void
3921 vm_map_clip_unnest(
3922         vm_map_t        map,
3923         vm_map_entry_t  entry,
3924         vm_map_offset_t start_unnest,
3925         vm_map_offset_t end_unnest)
3926 {
3927         vm_map_offset_t old_start_unnest = start_unnest;
3928         vm_map_offset_t old_end_unnest = end_unnest;
3929
3930         assert(entry->is_sub_map);
3931         assert(VME_SUBMAP(entry) != NULL);
3932         assert(entry->use_pmap);
3933
3934         /*
3935          * Query the platform for the optimal unnest range.
3936          * DRK: There's some duplication of effort here, since
3937          * callers may have adjusted the range to some extent. This
3938          * routine was introduced to support 1GiB subtree nesting
3939          * for x86 platforms, which can also nest on 2MiB boundaries
3940          * depending on size/alignment.
3941          */
3942         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3943                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3944         }
3945
3946         if (entry->vme_start > start_unnest ||
3947             entry->vme_end < end_unnest) {
3948                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3949                       "bad nested entry: start=0x%llx end=0x%llx\n",
3950                       (long long)start_unnest, (long long)end_unnest,
3951                       (long long)entry->vme_start, (long long)entry->vme_end);
3952         }
3953
3954         if (start_unnest > entry->vme_start) {
3955                 _vm_map_clip_start(&map->hdr,
3956                                    entry,
3957                                    start_unnest);
3958                 if (map->holelistenabled) {
3959                         vm_map_store_update_first_free(map, NULL, FALSE);
3960                 } else {
3961                         vm_map_store_update_first_free(map, map->first_free, FALSE);
3962                 }
3963         }
3964         if (entry->vme_end > end_unnest) {
3965                 _vm_map_clip_end(&map->hdr,
3966                                  entry,
3967                                  end_unnest);
3968                 if (map->holelistenabled) {
3969                         vm_map_store_update_first_free(map, NULL, FALSE);
3970                 } else {
3971                         vm_map_store_update_first_free(map, map->first_free, FALSE);
3972                 }
3973         }
3974
3975         pmap_unnest(map->pmap,
3976                     entry->vme_start,
3977                     entry->vme_end - entry->vme_start);
3978         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3979                 /* clean up parent map/maps */
3980                 vm_map_submap_pmap_clean(
3981                         map, entry->vme_start,
3982                         entry->vme_end,
3983                         VME_SUBMAP(entry),
3984                         VME_OFFSET(entry));
3985         }
3986         entry->use_pmap = FALSE;
3987         if ((map->pmap != kernel_pmap) &&
3988             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
3989                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
3990         }
3991 }
3992 #endif  /* NO_NESTED_PMAP */
3993
3994 /*
3995  *      vm_map_clip_start:      [ internal use only ]
3996  *
3997  *      Asserts that the given entry begins at or after
3998  *      the specified address; if necessary,
3999  *      it splits the entry into two.
4000  */
4001 void
4002 vm_map_clip_start(
4003         vm_map_t        map,
4004         vm_map_entry_t  entry,
4005         vm_map_offset_t startaddr)
4006 {
4007 #ifndef NO_NESTED_PMAP
4008         if (entry->is_sub_map &&
4009             entry->use_pmap &&
4010             startaddr >= entry->vme_start) {
4011                 vm_map_offset_t start_unnest, end_unnest;
4012
4013                 /*
4014                  * Make sure "startaddr" is no longer in a nested range
4015                  * before we clip.  Unnest only the minimum range the platform
4016                  * can handle.
4017                  * vm_map_clip_unnest may perform additional adjustments to
4018                  * the unnest range.
4019                  */
4020                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
4021                 end_unnest = start_unnest + pmap_nesting_size_min;
4022                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4023         }
4024 #endif /* NO_NESTED_PMAP */
4025         if (startaddr > entry->vme_start) {
4026                 if (VME_OBJECT(entry) &&
4027                     !entry->is_sub_map &&
4028                     VME_OBJECT(entry)->phys_contiguous) {
4029                         pmap_remove(map->pmap,
4030                                     (addr64_t)(entry->vme_start),
4031                                     (addr64_t)(entry->vme_end));
4032                 }
4033                 _vm_map_clip_start(&map->hdr, entry, startaddr);
4034                 if (map->holelistenabled) {
4035                         vm_map_store_update_first_free(map, NULL, FALSE);
4036                 } else {
4037                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4038                 }
4039         }
4040 }
4041
4042
4043 #define vm_map_copy_clip_start(copy, entry, startaddr) \
4044         MACRO_BEGIN \
4045         if ((startaddr) > (entry)->vme_start) \
4046                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
4047         MACRO_END
4048
4049 /*
4050  *      This routine is called only when it is known that
4051  *      the entry must be split.
4052  */
4053 static void
4054 _vm_map_clip_start(
4055         register struct vm_map_header   *map_header,
4056         register vm_map_entry_t         entry,
4057         register vm_map_offset_t        start)
4058 {
4059         register vm_map_entry_t new_entry;
4060
4061         /*
4062          *      Split off the front portion --
4063          *      note that we must insert the new
4064          *      entry BEFORE this one, so that
4065          *      this entry has the specified starting
4066          *      address.
4067          */
4068
4069         if (entry->map_aligned) {
4070                 assert(VM_MAP_PAGE_ALIGNED(start,
4071                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4072         }
4073
4074         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4075         vm_map_entry_copy_full(new_entry, entry);
4076
4077         new_entry->vme_end = start;
4078         assert(new_entry->vme_start < new_entry->vme_end);
4079         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
4080         assert(start < entry->vme_end);
4081         entry->vme_start = start;
4082
4083         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
4084
4085         if (entry->is_sub_map)
4086                 vm_map_reference(VME_SUBMAP(new_entry));
4087         else
4088                 vm_object_reference(VME_OBJECT(new_entry));
4089 }
4090
4091
4092 /*
4093  *      vm_map_clip_end:        [ internal use only ]
4094  *
4095  *      Asserts that the given entry ends at or before
4096  *      the specified address; if necessary,
4097  *      it splits the entry into two.
4098  */
4099 void
4100 vm_map_clip_end(
4101         vm_map_t        map,
4102         vm_map_entry_t  entry,
4103         vm_map_offset_t endaddr)
4104 {
4105         if (endaddr > entry->vme_end) {
4106                 /*
4107                  * Within the scope of this clipping, limit "endaddr" to
4108                  * the end of this map entry...
4109                  */
4110                 endaddr = entry->vme_end;
4111         }
4112 #ifndef NO_NESTED_PMAP
4113         if (entry->is_sub_map && entry->use_pmap) {
4114                 vm_map_offset_t start_unnest, end_unnest;
4115
4116                 /*
4117                  * Make sure the range between the start of this entry and
4118                  * the new "endaddr" is no longer nested before we clip.
4119                  * Unnest only the minimum range the platform can handle.
4120                  * vm_map_clip_unnest may perform additional adjustments to
4121                  * the unnest range.
4122                  */
4123                 start_unnest = entry->vme_start;
4124                 end_unnest =
4125                         (endaddr + pmap_nesting_size_min - 1) &
4126                         ~(pmap_nesting_size_min - 1);
4127                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
4128         }
4129 #endif /* NO_NESTED_PMAP */
4130         if (endaddr < entry->vme_end) {
4131                 if (VME_OBJECT(entry) &&
4132                     !entry->is_sub_map &&
4133                     VME_OBJECT(entry)->phys_contiguous) {
4134                         pmap_remove(map->pmap,
4135                                     (addr64_t)(entry->vme_start),
4136                                     (addr64_t)(entry->vme_end));
4137                 }
4138                 _vm_map_clip_end(&map->hdr, entry, endaddr);
4139                 if (map->holelistenabled) {
4140                         vm_map_store_update_first_free(map, NULL, FALSE);
4141                 } else {
4142                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4143                 }
4144         }
4145 }
4146
4147
4148 #define vm_map_copy_clip_end(copy, entry, endaddr) \
4149         MACRO_BEGIN \
4150         if ((endaddr) < (entry)->vme_end) \
4151                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
4152         MACRO_END
4153
4154 /*
4155  *      This routine is called only when it is known that
4156  *      the entry must be split.
4157  */
4158 static void
4159 _vm_map_clip_end(
4160         register struct vm_map_header   *map_header,
4161         register vm_map_entry_t         entry,
4162         register vm_map_offset_t        end)
4163 {
4164         register vm_map_entry_t new_entry;
4165
4166         /*
4167          *      Create a new entry and insert it
4168          *      AFTER the specified entry
4169          */
4170
4171         if (entry->map_aligned) {
4172                 assert(VM_MAP_PAGE_ALIGNED(end,
4173                                            VM_MAP_HDR_PAGE_MASK(map_header)));
4174         }
4175
4176         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
4177         vm_map_entry_copy_full(new_entry, entry);
4178
4179         assert(entry->vme_start < end);
4180         new_entry->vme_start = entry->vme_end = end;
4181         VME_OFFSET_SET(new_entry,
4182                        VME_OFFSET(new_entry) + (end - entry->vme_start));
4183         assert(new_entry->vme_start < new_entry->vme_end);
4184
4185         _vm_map_store_entry_link(map_header, entry, new_entry);
4186
4187         if (entry->is_sub_map)
4188                 vm_map_reference(VME_SUBMAP(new_entry));
4189         else
4190                 vm_object_reference(VME_OBJECT(new_entry));
4191 }
4192
4193
4194 /*
4195  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
4196  *
4197  *      Asserts that the starting and ending region
4198  *      addresses fall within the valid range of the map.
4199  */
4200 #define VM_MAP_RANGE_CHECK(map, start, end)     \
4201         MACRO_BEGIN                             \
4202         if (start < vm_map_min(map))            \
4203                 start = vm_map_min(map);        \
4204         if (end > vm_map_max(map))              \
4205                 end = vm_map_max(map);          \
4206         if (start > end)                        \
4207                 start = end;                    \
4208         MACRO_END
4209
4210 /*
4211  *      vm_map_range_check:     [ internal use only ]
4212  *
4213  *      Check that the region defined by the specified start and
4214  *      end addresses are wholly contained within a single map
4215  *      entry or set of adjacent map entries of the spacified map,
4216  *      i.e. the specified region contains no unmapped space.
4217  *      If any or all of the region is unmapped, FALSE is returned.
4218  *      Otherwise, TRUE is returned and if the output argument 'entry'
4219  *      is not NULL it points to the map entry containing the start
4220  *      of the region.
4221  *
4222  *      The map is locked for reading on entry and is left locked.
4223  */
4224 static boolean_t
4225 vm_map_range_check(
4226         register vm_map_t       map,
4227         register vm_map_offset_t        start,
4228         register vm_map_offset_t        end,
4229         vm_map_entry_t          *entry)
4230 {
4231         vm_map_entry_t          cur;
4232         register vm_map_offset_t        prev;
4233
4234         /*
4235          *      Basic sanity checks first
4236          */
4237         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
4238                 return (FALSE);
4239
4240         /*
4241          *      Check first if the region starts within a valid
4242          *      mapping for the map.
4243          */
4244         if (!vm_map_lookup_entry(map, start, &cur))
4245                 return (FALSE);
4246
4247         /*
4248          *      Optimize for the case that the region is contained
4249          *      in a single map entry.
4250          */
4251         if (entry != (vm_map_entry_t *) NULL)
4252                 *entry = cur;
4253         if (end <= cur->vme_end)
4254                 return (TRUE);
4255
4256         /*
4257          *      If the region is not wholly contained within a
4258          *      single entry, walk the entries looking for holes.
4259          */
4260         prev = cur->vme_end;
4261         cur = cur->vme_next;
4262         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
4263                 if (end <= cur->vme_end)
4264                         return (TRUE);
4265                 prev = cur->vme_end;
4266                 cur = cur->vme_next;
4267         }
4268         return (FALSE);
4269 }
4270
4271 /*
4272  *      vm_map_submap:          [ kernel use only ]
4273  *
4274  *      Mark the given range as handled by a subordinate map.
4275  *
4276  *      This range must have been created with vm_map_find using
4277  *      the vm_submap_object, and no other operations may have been
4278  *      performed on this range prior to calling vm_map_submap.
4279  *
4280  *      Only a limited number of operations can be performed
4281  *      within this rage after calling vm_map_submap:
4282  *              vm_fault
4283  *      [Don't try vm_map_copyin!]
4284  *
4285  *      To remove a submapping, one must first remove the
4286  *      range from the superior map, and then destroy the
4287  *      submap (if desired).  [Better yet, don't try it.]
4288  */
4289 kern_return_t
4290 vm_map_submap(
4291         vm_map_t        map,
4292         vm_map_offset_t start,
4293         vm_map_offset_t end,
4294         vm_map_t        submap,
4295         vm_map_offset_t offset,
4296 #ifdef NO_NESTED_PMAP
4297         __unused
4298 #endif  /* NO_NESTED_PMAP */
4299         boolean_t       use_pmap)
4300 {
4301         vm_map_entry_t          entry;
4302         register kern_return_t  result = KERN_INVALID_ARGUMENT;
4303         register vm_object_t    object;
4304
4305         vm_map_lock(map);
4306
4307         if (! vm_map_lookup_entry(map, start, &entry)) {
4308                 entry = entry->vme_next;
4309         }
4310
4311         if (entry == vm_map_to_entry(map) ||
4312             entry->is_sub_map) {
4313                 vm_map_unlock(map);
4314                 return KERN_INVALID_ARGUMENT;
4315         }
4316
4317         vm_map_clip_start(map, entry, start);
4318         vm_map_clip_end(map, entry, end);
4319
4320         if ((entry->vme_start == start) && (entry->vme_end == end) &&
4321             (!entry->is_sub_map) &&
4322             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
4323             (object->resident_page_count == 0) &&
4324             (object->copy == VM_OBJECT_NULL) &&
4325             (object->shadow == VM_OBJECT_NULL) &&
4326             (!object->pager_created)) {
4327                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
4328                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
4329                 vm_object_deallocate(object);
4330                 entry->is_sub_map = TRUE;
4331                 entry->use_pmap = FALSE;
4332                 VME_SUBMAP_SET(entry, submap);
4333                 vm_map_reference(submap);
4334                 if (submap->mapped_in_other_pmaps == FALSE &&
4335                     vm_map_pmap(submap) != PMAP_NULL &&
4336                     vm_map_pmap(submap) != vm_map_pmap(map)) {
4337                         /*
4338                          * This submap is being mapped in a map
4339                          * that uses a different pmap.
4340                          * Set its "mapped_in_other_pmaps" flag
4341                          * to indicate that we now need to
4342                          * remove mappings from all pmaps rather
4343                          * than just the submap's pmap.
4344                          */
4345                         submap->mapped_in_other_pmaps = TRUE;
4346                 }
4347
4348 #ifndef NO_NESTED_PMAP
4349                 if (use_pmap) {
4350                         /* nest if platform code will allow */
4351                         if(submap->pmap == NULL) {
4352                                 ledger_t ledger = map->pmap->ledger;
4353                                 submap->pmap = pmap_create(ledger,
4354                                                 (vm_map_size_t) 0, FALSE);
4355                                 if(submap->pmap == PMAP_NULL) {
4356                                         vm_map_unlock(map);
4357                                         return(KERN_NO_SPACE);
4358                                 }
4359                         }
4360                         result = pmap_nest(map->pmap,
4361                                            (VME_SUBMAP(entry))->pmap,
4362                                            (addr64_t)start,
4363                                            (addr64_t)start,
4364                                            (uint64_t)(end - start));
4365                         if(result)
4366                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
4367                         entry->use_pmap = TRUE;
4368                 }
4369 #else   /* NO_NESTED_PMAP */
4370                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
4371 #endif  /* NO_NESTED_PMAP */
4372                 result = KERN_SUCCESS;
4373         }
4374         vm_map_unlock(map);
4375
4376         return(result);
4377 }
4378
4379
4380 /*
4381  *      vm_map_protect:
4382  *
4383  *      Sets the protection of the specified address
4384  *      region in the target map.  If "set_max" is
4385  *      specified, the maximum protection is to be set;
4386  *      otherwise, only the current protection is affected.
4387  */
4388 kern_return_t
4389 vm_map_protect(
4390         register vm_map_t       map,
4391         register vm_map_offset_t        start,
4392         register vm_map_offset_t        end,
4393         register vm_prot_t      new_prot,
4394         register boolean_t      set_max)
4395 {
4396         register vm_map_entry_t         current;
4397         register vm_map_offset_t        prev;
4398         vm_map_entry_t                  entry;
4399         vm_prot_t                       new_max;
4400
4401         XPR(XPR_VM_MAP,
4402             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4403             map, start, end, new_prot, set_max);
4404
4405         vm_map_lock(map);
4406
4407         /* LP64todo - remove this check when vm_map_commpage64()
4408          * no longer has to stuff in a map_entry for the commpage
4409          * above the map's max_offset.
4410          */
4411         if (start >= map->max_offset) {
4412                 vm_map_unlock(map);
4413                 return(KERN_INVALID_ADDRESS);
4414         }
4415
4416         while(1) {
4417                 /*
4418                  *      Lookup the entry.  If it doesn't start in a valid
4419                  *      entry, return an error.
4420                  */
4421                 if (! vm_map_lookup_entry(map, start, &entry)) {
4422                         vm_map_unlock(map);
4423                         return(KERN_INVALID_ADDRESS);
4424                 }
4425
4426                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4427                         start = SUPERPAGE_ROUND_DOWN(start);
4428                         continue;
4429                 }
4430                 break;
4431         }
4432         if (entry->superpage_size)
4433                 end = SUPERPAGE_ROUND_UP(end);
4434
4435         /*
4436          *      Make a first pass to check for protection and address
4437          *      violations.
4438          */
4439
4440         current = entry;
4441         prev = current->vme_start;
4442         while ((current != vm_map_to_entry(map)) &&
4443                (current->vme_start < end)) {
4444
4445                 /*
4446                  * If there is a hole, return an error.
4447                  */
4448                 if (current->vme_start != prev) {
4449                         vm_map_unlock(map);
4450                         return(KERN_INVALID_ADDRESS);
4451                 }
4452
4453                 new_max = current->max_protection;
4454                 if(new_prot & VM_PROT_COPY) {
4455                         new_max |= VM_PROT_WRITE;
4456                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4457                                 vm_map_unlock(map);
4458                                 return(KERN_PROTECTION_FAILURE);
4459                         }
4460                 } else {
4461                         if ((new_prot & new_max) != new_prot) {
4462                                 vm_map_unlock(map);
4463                                 return(KERN_PROTECTION_FAILURE);
4464                         }
4465                 }
4466
4467
4468                 prev = current->vme_end;
4469                 current = current->vme_next;
4470         }
4471         if (end > prev) {
4472                 vm_map_unlock(map);
4473                 return(KERN_INVALID_ADDRESS);
4474         }
4475
4476         /*
4477          *      Go back and fix up protections.
4478          *      Clip to start here if the range starts within
4479          *      the entry.
4480          */
4481
4482         current = entry;
4483         if (current != vm_map_to_entry(map)) {
4484                 /* clip and unnest if necessary */
4485                 vm_map_clip_start(map, current, start);
4486         }
4487
4488         while ((current != vm_map_to_entry(map)) &&
4489                (current->vme_start < end)) {
4490
4491                 vm_prot_t       old_prot;
4492
4493                 vm_map_clip_end(map, current, end);
4494
4495                 if (current->is_sub_map) {
4496                         /* clipping did unnest if needed */
4497                         assert(!current->use_pmap);
4498                 }
4499
4500                 old_prot = current->protection;
4501
4502                 if(new_prot & VM_PROT_COPY) {
4503                         /* caller is asking specifically to copy the      */
4504                         /* mapped data, this implies that max protection  */
4505                         /* will include write.  Caller must be prepared   */
4506                         /* for loss of shared memory communication in the */
4507                         /* target area after taking this step */
4508
4509                         if (current->is_sub_map == FALSE &&
4510                             VME_OBJECT(current) == VM_OBJECT_NULL) {
4511                                 VME_OBJECT_SET(current,
4512                                                vm_object_allocate(
4513                                                        (vm_map_size_t)
4514                                                        (current->vme_end -
4515                                                         current->vme_start)));
4516                                 VME_OFFSET_SET(current, 0);
4517                                 assert(current->use_pmap);
4518                         }
4519                         assert(current->wired_count == 0);
4520                         current->needs_copy = TRUE;
4521                         current->max_protection |= VM_PROT_WRITE;
4522                 }
4523
4524                 if (set_max)
4525                         current->protection =
4526                                 (current->max_protection =
4527                                  new_prot & ~VM_PROT_COPY) &
4528                                 old_prot;
4529                 else
4530                         current->protection = new_prot & ~VM_PROT_COPY;
4531
4532                 /*
4533                  *      Update physical map if necessary.
4534                  *      If the request is to turn off write protection,
4535                  *      we won't do it for real (in pmap). This is because
4536                  *      it would cause copy-on-write to fail.  We've already
4537                  *      set, the new protection in the map, so if a
4538                  *      write-protect fault occurred, it will be fixed up
4539                  *      properly, COW or not.
4540                  */
4541                 if (current->protection != old_prot) {
4542                         /* Look one level in we support nested pmaps */
4543                         /* from mapped submaps which are direct entries */
4544                         /* in our map */
4545
4546                         vm_prot_t prot;
4547
4548                         prot = current->protection & ~VM_PROT_WRITE;
4549
4550                         if (override_nx(map, VME_ALIAS(current)) && prot)
4551                                 prot |= VM_PROT_EXECUTE;
4552
4553
4554                         if (current->is_sub_map && current->use_pmap) {
4555                                 pmap_protect(VME_SUBMAP(current)->pmap,
4556                                              current->vme_start,
4557                                              current->vme_end,
4558                                              prot);
4559                         } else {
4560                                 pmap_protect(map->pmap,
4561                                              current->vme_start,
4562                                              current->vme_end,
4563                                              prot);
4564                         }
4565                 }
4566                 current = current->vme_next;
4567         }
4568
4569         current = entry;
4570         while ((current != vm_map_to_entry(map)) &&
4571                (current->vme_start <= end)) {
4572                 vm_map_simplify_entry(map, current);
4573                 current = current->vme_next;
4574         }
4575
4576         vm_map_unlock(map);
4577         return(KERN_SUCCESS);
4578 }
4579
4580 /*
4581  *      vm_map_inherit:
4582  *
4583  *      Sets the inheritance of the specified address
4584  *      range in the target map.  Inheritance
4585  *      affects how the map will be shared with
4586  *      child maps at the time of vm_map_fork.
4587  */
4588 kern_return_t
4589 vm_map_inherit(
4590         register vm_map_t       map,
4591         register vm_map_offset_t        start,
4592         register vm_map_offset_t        end,
4593         register vm_inherit_t   new_inheritance)
4594 {
4595         register vm_map_entry_t entry;
4596         vm_map_entry_t  temp_entry;
4597
4598         vm_map_lock(map);
4599
4600         VM_MAP_RANGE_CHECK(map, start, end);
4601
4602         if (vm_map_lookup_entry(map, start, &temp_entry)) {
4603                 entry = temp_entry;
4604         }
4605         else {
4606                 temp_entry = temp_entry->vme_next;
4607                 entry = temp_entry;
4608         }
4609
4610         /* first check entire range for submaps which can't support the */
4611         /* given inheritance. */
4612         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4613                 if(entry->is_sub_map) {
4614                         if(new_inheritance == VM_INHERIT_COPY) {
4615                                 vm_map_unlock(map);
4616                                 return(KERN_INVALID_ARGUMENT);
4617                         }
4618                 }
4619
4620                 entry = entry->vme_next;
4621         }
4622
4623         entry = temp_entry;
4624         if (entry != vm_map_to_entry(map)) {
4625                 /* clip and unnest if necessary */
4626                 vm_map_clip_start(map, entry, start);
4627         }
4628
4629         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4630                 vm_map_clip_end(map, entry, end);
4631                 if (entry->is_sub_map) {
4632                         /* clip did unnest if needed */
4633                         assert(!entry->use_pmap);
4634                 }
4635
4636                 entry->inheritance = new_inheritance;
4637
4638                 entry = entry->vme_next;
4639         }
4640
4641         vm_map_unlock(map);
4642         return(KERN_SUCCESS);
4643 }
4644
4645 /*
4646  * Update the accounting for the amount of wired memory in this map.  If the user has
4647  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
4648  */
4649
4650 static kern_return_t
4651 add_wire_counts(
4652         vm_map_t        map,
4653         vm_map_entry_t  entry,
4654         boolean_t       user_wire)
4655 {
4656         vm_map_size_t   size;
4657
4658         if (user_wire) {
4659                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
4660
4661                 /*
4662                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
4663                  * this map entry.
4664                  */
4665
4666                 if (entry->user_wired_count == 0) {
4667                         size = entry->vme_end - entry->vme_start;
4668
4669                         /*
4670                          * Since this is the first time the user is wiring this map entry, check to see if we're
4671                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4672                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4673                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4674                          * limit, then we fail.
4675                          */
4676
4677                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4678                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4679                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4680                                 return KERN_RESOURCE_SHORTAGE;
4681
4682                         /*
4683                          * The first time the user wires an entry, we also increment the wired_count and add this to
4684                          * the total that has been wired in the map.
4685                          */
4686
4687                         if (entry->wired_count >= MAX_WIRE_COUNT)
4688                                 return KERN_FAILURE;
4689
4690                         entry->wired_count++;
4691                         map->user_wire_size += size;
4692                 }
4693
4694                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4695                         return KERN_FAILURE;
4696
4697                 entry->user_wired_count++;
4698
4699         } else {
4700
4701                 /*
4702                  * The kernel's wiring the memory.  Just bump the count and continue.
4703                  */
4704
4705                 if (entry->wired_count >= MAX_WIRE_COUNT)
4706                         panic("vm_map_wire: too many wirings");
4707
4708                 entry->wired_count++;
4709         }
4710
4711         return KERN_SUCCESS;
4712 }
4713
4714 /*
4715  * Update the memory wiring accounting now that the given map entry is being unwired.
4716  */
4717
4718 static void
4719 subtract_wire_counts(
4720         vm_map_t        map,
4721         vm_map_entry_t  entry,
4722         boolean_t       user_wire)
4723 {
4724
4725         if (user_wire) {
4726
4727                 /*
4728                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4729                  */
4730
4731                 if (entry->user_wired_count == 1) {
4732
4733                         /*
4734                          * We're removing the last user wire reference.  Decrement the wired_count and the total
4735                          * user wired memory for this map.
4736                          */
4737
4738                         assert(entry->wired_count >= 1);
4739                         entry->wired_count--;
4740                         map->user_wire_size -= entry->vme_end - entry->vme_start;
4741                 }
4742
4743                 assert(entry->user_wired_count >= 1);
4744                 entry->user_wired_count--;
4745
4746         } else {
4747
4748                 /*
4749                  * The kernel is unwiring the memory.   Just update the count.
4750                  */
4751
4752                 assert(entry->wired_count >= 1);
4753                 entry->wired_count--;
4754         }
4755 }
4756
4757 /*
4758  *      vm_map_wire:
4759  *
4760  *      Sets the pageability of the specified address range in the
4761  *      target map as wired.  Regions specified as not pageable require
4762  *      locked-down physical memory and physical page maps.  The
4763  *      access_type variable indicates types of accesses that must not
4764  *      generate page faults.  This is checked against protection of
4765  *      memory being locked-down.
4766  *
4767  *      The map must not be locked, but a reference must remain to the
4768  *      map throughout the call.
4769  */
4770 static kern_return_t
4771 vm_map_wire_nested(
4772         register vm_map_t       map,
4773         register vm_map_offset_t        start,
4774         register vm_map_offset_t        end,
4775         register vm_prot_t      caller_prot,
4776         boolean_t               user_wire,
4777         pmap_t                  map_pmap,
4778         vm_map_offset_t         pmap_addr,
4779         ppnum_t                 *physpage_p)
4780 {
4781         register vm_map_entry_t entry;
4782         register vm_prot_t      access_type;
4783         struct vm_map_entry     *first_entry, tmp_entry;
4784         vm_map_t                real_map;
4785         register vm_map_offset_t        s,e;
4786         kern_return_t           rc;
4787         boolean_t               need_wakeup;
4788         boolean_t               main_map = FALSE;
4789         wait_interrupt_t        interruptible_state;
4790         thread_t                cur_thread;
4791         unsigned int            last_timestamp;
4792         vm_map_size_t           size;
4793         boolean_t               wire_and_extract;
4794
4795         access_type = (caller_prot & VM_PROT_ALL);
4796
4797         wire_and_extract = FALSE;
4798         if (physpage_p != NULL) {
4799                 /*
4800                  * The caller wants the physical page number of the
4801                  * wired page.  We return only one physical page number
4802                  * so this works for only one page at a time.
4803                  */
4804                 if ((end - start) != PAGE_SIZE) {
4805                         return KERN_INVALID_ARGUMENT;
4806                 }
4807                 wire_and_extract = TRUE;
4808                 *physpage_p = 0;
4809         }
4810
4811         vm_map_lock(map);
4812         if(map_pmap == NULL)
4813                 main_map = TRUE;
4814         last_timestamp = map->timestamp;
4815
4816         VM_MAP_RANGE_CHECK(map, start, end);
4817         assert(page_aligned(start));
4818         assert(page_aligned(end));
4819         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4820         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4821         if (start == end) {
4822                 /* We wired what the caller asked for, zero pages */
4823                 vm_map_unlock(map);
4824                 return KERN_SUCCESS;
4825         }
4826
4827         need_wakeup = FALSE;
4828         cur_thread = current_thread();
4829
4830         s = start;
4831         rc = KERN_SUCCESS;
4832
4833         if (vm_map_lookup_entry(map, s, &first_entry)) {
4834                 entry = first_entry;
4835                 /*
4836                  * vm_map_clip_start will be done later.
4837                  * We don't want to unnest any nested submaps here !
4838                  */
4839         } else {
4840                 /* Start address is not in map */
4841                 rc = KERN_INVALID_ADDRESS;
4842                 goto done;
4843         }
4844
4845         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4846                 /*
4847                  * At this point, we have wired from "start" to "s".
4848                  * We still need to wire from "s" to "end".
4849                  *
4850                  * "entry" hasn't been clipped, so it could start before "s"
4851                  * and/or end after "end".
4852                  */
4853
4854                 /* "e" is how far we want to wire in this entry */
4855                 e = entry->vme_end;
4856                 if (e > end)
4857                         e = end;
4858
4859                 /*
4860                  * If another thread is wiring/unwiring this entry then
4861                  * block after informing other thread to wake us up.
4862                  */
4863                 if (entry->in_transition) {
4864                         wait_result_t wait_result;
4865
4866                         /*
4867                          * We have not clipped the entry.  Make sure that
4868                          * the start address is in range so that the lookup
4869                          * below will succeed.
4870                          * "s" is the current starting point: we've already
4871                          * wired from "start" to "s" and we still have
4872                          * to wire from "s" to "end".
4873                          */
4874
4875                         entry->needs_wakeup = TRUE;
4876
4877                         /*
4878                          * wake up anybody waiting on entries that we have
4879                          * already wired.
4880                          */
4881                         if (need_wakeup) {
4882                                 vm_map_entry_wakeup(map);
4883                                 need_wakeup = FALSE;
4884                         }
4885                         /*
4886                          * User wiring is interruptible
4887                          */
4888                         wait_result = vm_map_entry_wait(map,
4889                                                         (user_wire) ? THREAD_ABORTSAFE :
4890                                                         THREAD_UNINT);
4891                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
4892                                 /*
4893                                  * undo the wirings we have done so far
4894                                  * We do not clear the needs_wakeup flag,
4895                                  * because we cannot tell if we were the
4896                                  * only one waiting.
4897                                  */
4898                                 rc = KERN_FAILURE;
4899                                 goto done;
4900                         }
4901
4902                         /*
4903                          * Cannot avoid a lookup here. reset timestamp.
4904                          */
4905                         last_timestamp = map->timestamp;
4906
4907                         /*
4908                          * The entry could have been clipped, look it up again.
4909                          * Worse that can happen is, it may not exist anymore.
4910                          */
4911                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4912                                 /*
4913                                  * User: undo everything upto the previous
4914                                  * entry.  let vm_map_unwire worry about
4915                                  * checking the validity of the range.
4916                                  */
4917                                 rc = KERN_FAILURE;
4918                                 goto done;
4919                         }
4920                         entry = first_entry;
4921                         continue;
4922                 }
4923
4924                 if (entry->is_sub_map) {
4925                         vm_map_offset_t sub_start;
4926                         vm_map_offset_t sub_end;
4927                         vm_map_offset_t local_start;
4928                         vm_map_offset_t local_end;
4929                         pmap_t          pmap;
4930
4931                         if (wire_and_extract) {
4932                                 /*
4933                                  * Wiring would result in copy-on-write
4934                                  * which would not be compatible with
4935                                  * the sharing we have with the original
4936                                  * provider of this memory.
4937                                  */
4938                                 rc = KERN_INVALID_ARGUMENT;
4939                                 goto done;
4940                         }
4941
4942                         vm_map_clip_start(map, entry, s);
4943                         vm_map_clip_end(map, entry, end);
4944
4945                         sub_start = VME_OFFSET(entry);
4946                         sub_end = entry->vme_end;
4947                         sub_end += VME_OFFSET(entry) - entry->vme_start;
4948
4949                         local_end = entry->vme_end;
4950                         if(map_pmap == NULL) {
4951                                 vm_object_t             object;
4952                                 vm_object_offset_t      offset;
4953                                 vm_prot_t               prot;
4954                                 boolean_t               wired;
4955                                 vm_map_entry_t          local_entry;
4956                                 vm_map_version_t         version;
4957                                 vm_map_t                lookup_map;
4958
4959                                 if(entry->use_pmap) {
4960                                         pmap = VME_SUBMAP(entry)->pmap;
4961                                         /* ppc implementation requires that */
4962                                         /* submaps pmap address ranges line */
4963                                         /* up with parent map */
4964 #ifdef notdef
4965                                         pmap_addr = sub_start;
4966 #endif
4967                                         pmap_addr = s;
4968                                 } else {
4969                                         pmap = map->pmap;
4970                                         pmap_addr = s;
4971                                 }
4972
4973                                 if (entry->wired_count) {
4974                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4975                                                 goto done;
4976
4977                                         /*
4978                                          * The map was not unlocked:
4979                                          * no need to goto re-lookup.
4980                                          * Just go directly to next entry.
4981                                          */
4982                                         entry = entry->vme_next;
4983                                         s = entry->vme_start;
4984                                         continue;
4985
4986                                 }
4987
4988                                 /* call vm_map_lookup_locked to */
4989                                 /* cause any needs copy to be   */
4990                                 /* evaluated */
4991                                 local_start = entry->vme_start;
4992                                 lookup_map = map;
4993                                 vm_map_lock_write_to_read(map);
4994                                 if(vm_map_lookup_locked(
4995                                            &lookup_map, local_start,
4996                                            access_type,
4997                                            OBJECT_LOCK_EXCLUSIVE,
4998                                            &version, &object,
4999                                            &offset, &prot, &wired,
5000                                            NULL,
5001                                            &real_map)) {
5002
5003                                         vm_map_unlock_read(lookup_map);
5004                                         assert(map_pmap == NULL);
5005                                         vm_map_unwire(map, start,
5006                                                       s, user_wire);
5007                                         return(KERN_FAILURE);
5008                                 }
5009                                 vm_object_unlock(object);
5010                                 if(real_map != lookup_map)
5011                                         vm_map_unlock(real_map);
5012                                 vm_map_unlock_read(lookup_map);
5013                                 vm_map_lock(map);
5014
5015                                 /* we unlocked, so must re-lookup */
5016                                 if (!vm_map_lookup_entry(map,
5017                                                          local_start,
5018                                                          &local_entry)) {
5019                                         rc = KERN_FAILURE;
5020                                         goto done;
5021                                 }
5022
5023                                 /*
5024                                  * entry could have been "simplified",
5025                                  * so re-clip
5026                                  */
5027                                 entry = local_entry;
5028                                 assert(s == local_start);
5029                                 vm_map_clip_start(map, entry, s);
5030                                 vm_map_clip_end(map, entry, end);
5031                                 /* re-compute "e" */
5032                                 e = entry->vme_end;
5033                                 if (e > end)
5034                                         e = end;
5035
5036                                 /* did we have a change of type? */
5037                                 if (!entry->is_sub_map) {
5038                                         last_timestamp = map->timestamp;
5039                                         continue;
5040                                 }
5041                         } else {
5042                                 local_start = entry->vme_start;
5043                                 pmap = map_pmap;
5044                         }
5045
5046                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5047                                 goto done;
5048
5049                         entry->in_transition = TRUE;
5050
5051                         vm_map_unlock(map);
5052                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
5053                                                 sub_start, sub_end,
5054                                                 caller_prot,
5055                                                 user_wire, pmap, pmap_addr,
5056                                                 NULL);
5057                         vm_map_lock(map);
5058
5059                         /*
5060                          * Find the entry again.  It could have been clipped
5061                          * after we unlocked the map.
5062                          */
5063                         if (!vm_map_lookup_entry(map, local_start,
5064                                                  &first_entry))
5065                                 panic("vm_map_wire: re-lookup failed");
5066                         entry = first_entry;
5067
5068                         assert(local_start == s);
5069                         /* re-compute "e" */
5070                         e = entry->vme_end;
5071                         if (e > end)
5072                                 e = end;
5073
5074                         last_timestamp = map->timestamp;
5075                         while ((entry != vm_map_to_entry(map)) &&
5076                                (entry->vme_start < e)) {
5077                                 assert(entry->in_transition);
5078                                 entry->in_transition = FALSE;
5079                                 if (entry->needs_wakeup) {
5080                                         entry->needs_wakeup = FALSE;
5081                                         need_wakeup = TRUE;
5082                                 }
5083                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
5084                                         subtract_wire_counts(map, entry, user_wire);
5085                                 }
5086                                 entry = entry->vme_next;
5087                         }
5088                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5089                                 goto done;
5090                         }
5091
5092                         /* no need to relookup again */
5093                         s = entry->vme_start;
5094                         continue;
5095                 }
5096
5097                 /*
5098                  * If this entry is already wired then increment
5099                  * the appropriate wire reference count.
5100                  */
5101                 if (entry->wired_count) {
5102
5103                         if ((entry->protection & access_type) != access_type) {
5104                                 /* found a protection problem */
5105
5106                                 /*
5107                                  * XXX FBDP
5108                                  * We should always return an error
5109                                  * in this case but since we didn't
5110                                  * enforce it before, let's do
5111                                  * it only for the new "wire_and_extract"
5112                                  * code path for now...
5113                                  */
5114                                 if (wire_and_extract) {
5115                                         rc = KERN_PROTECTION_FAILURE;
5116                                         goto done;
5117                                 }
5118                         }
5119
5120                         /*
5121                          * entry is already wired down, get our reference
5122                          * after clipping to our range.
5123                          */
5124                         vm_map_clip_start(map, entry, s);
5125                         vm_map_clip_end(map, entry, end);
5126
5127                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5128                                 goto done;
5129
5130                         if (wire_and_extract) {
5131                                 vm_object_t             object;
5132                                 vm_object_offset_t      offset;
5133                                 vm_page_t               m;
5134
5135                                 /*
5136                                  * We don't have to "wire" the page again
5137                                  * bit we still have to "extract" its
5138                                  * physical page number, after some sanity
5139                                  * checks.
5140                                  */
5141                                 assert((entry->vme_end - entry->vme_start)
5142                                        == PAGE_SIZE);
5143                                 assert(!entry->needs_copy);
5144                                 assert(!entry->is_sub_map);
5145                                 assert(VME_OBJECT(entry));
5146                                 if (((entry->vme_end - entry->vme_start)
5147                                      != PAGE_SIZE) ||
5148                                     entry->needs_copy ||
5149                                     entry->is_sub_map ||
5150                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
5151                                         rc = KERN_INVALID_ARGUMENT;
5152                                         goto done;
5153                                 }
5154
5155                                 object = VME_OBJECT(entry);
5156                                 offset = VME_OFFSET(entry);
5157                                 /* need exclusive lock to update m->dirty */
5158                                 if (entry->protection & VM_PROT_WRITE) {
5159                                         vm_object_lock(object);
5160                                 } else {
5161                                         vm_object_lock_shared(object);
5162                                 }
5163                                 m = vm_page_lookup(object, offset);
5164                                 assert(m != VM_PAGE_NULL);
5165                                 assert(m->wire_count);
5166                                 if (m != VM_PAGE_NULL && m->wire_count) {
5167                                         *physpage_p = m->phys_page;
5168                                         if (entry->protection & VM_PROT_WRITE) {
5169                                                 vm_object_lock_assert_exclusive(
5170                                                         m->object);
5171                                                 m->dirty = TRUE;
5172                                         }
5173                                 } else {
5174                                         /* not already wired !? */
5175                                         *physpage_p = 0;
5176                                 }
5177                                 vm_object_unlock(object);
5178                         }
5179
5180                         /* map was not unlocked: no need to relookup */
5181                         entry = entry->vme_next;
5182                         s = entry->vme_start;
5183                         continue;
5184                 }
5185
5186                 /*
5187                  * Unwired entry or wire request transmitted via submap
5188                  */
5189
5190
5191                 /*
5192                  * Perform actions of vm_map_lookup that need the write
5193                  * lock on the map: create a shadow object for a
5194                  * copy-on-write region, or an object for a zero-fill
5195                  * region.
5196                  */
5197                 size = entry->vme_end - entry->vme_start;
5198                 /*
5199                  * If wiring a copy-on-write page, we need to copy it now
5200                  * even if we're only (currently) requesting read access.
5201                  * This is aggressive, but once it's wired we can't move it.
5202                  */
5203                 if (entry->needs_copy) {
5204                         if (wire_and_extract) {
5205                                 /*
5206                                  * We're supposed to share with the original
5207                                  * provider so should not be "needs_copy"
5208                                  */
5209                                 rc = KERN_INVALID_ARGUMENT;
5210                                 goto done;
5211                         }
5212
5213                         VME_OBJECT_SHADOW(entry, size);
5214                         entry->needs_copy = FALSE;
5215                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5216                         if (wire_and_extract) {
5217                                 /*
5218                                  * We're supposed to share with the original
5219                                  * provider so should already have an object.
5220                                  */
5221                                 rc = KERN_INVALID_ARGUMENT;
5222                                 goto done;
5223                         }
5224                         VME_OBJECT_SET(entry, vm_object_allocate(size));
5225                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
5226                         assert(entry->use_pmap);
5227                 }
5228
5229                 vm_map_clip_start(map, entry, s);
5230                 vm_map_clip_end(map, entry, end);
5231
5232                 /* re-compute "e" */
5233                 e = entry->vme_end;
5234                 if (e > end)
5235                         e = end;
5236
5237                 /*
5238                  * Check for holes and protection mismatch.
5239                  * Holes: Next entry should be contiguous unless this
5240                  *        is the end of the region.
5241                  * Protection: Access requested must be allowed, unless
5242                  *      wiring is by protection class
5243                  */
5244                 if ((entry->vme_end < end) &&
5245                     ((entry->vme_next == vm_map_to_entry(map)) ||
5246                      (entry->vme_next->vme_start > entry->vme_end))) {
5247                         /* found a hole */
5248                         rc = KERN_INVALID_ADDRESS;
5249                         goto done;
5250                 }
5251                 if ((entry->protection & access_type) != access_type) {
5252                         /* found a protection problem */
5253                         rc = KERN_PROTECTION_FAILURE;
5254                         goto done;
5255                 }
5256
5257                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
5258
5259                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
5260                         goto done;
5261
5262                 entry->in_transition = TRUE;
5263
5264                 /*
5265                  * This entry might get split once we unlock the map.
5266                  * In vm_fault_wire(), we need the current range as
5267                  * defined by this entry.  In order for this to work
5268                  * along with a simultaneous clip operation, we make a
5269                  * temporary copy of this entry and use that for the
5270                  * wiring.  Note that the underlying objects do not
5271                  * change during a clip.
5272                  */
5273                 tmp_entry = *entry;
5274
5275                 /*
5276                  * The in_transition state guarentees that the entry
5277                  * (or entries for this range, if split occured) will be
5278                  * there when the map lock is acquired for the second time.
5279                  */
5280                 vm_map_unlock(map);
5281
5282                 if (!user_wire && cur_thread != THREAD_NULL)
5283                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
5284                 else
5285                         interruptible_state = THREAD_UNINT;
5286
5287                 if(map_pmap)
5288                         rc = vm_fault_wire(map,
5289                                            &tmp_entry, caller_prot, map_pmap, pmap_addr,
5290                                            physpage_p);
5291                 else
5292                         rc = vm_fault_wire(map,
5293                                            &tmp_entry, caller_prot, map->pmap,
5294                                            tmp_entry.vme_start,
5295                                            physpage_p);
5296
5297                 if (!user_wire && cur_thread != THREAD_NULL)
5298                         thread_interrupt_level(interruptible_state);
5299
5300                 vm_map_lock(map);
5301
5302                 if (last_timestamp+1 != map->timestamp) {
5303                         /*
5304                          * Find the entry again.  It could have been clipped
5305                          * after we unlocked the map.
5306                          */
5307                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5308                                                  &first_entry))
5309                                 panic("vm_map_wire: re-lookup failed");
5310
5311                         entry = first_entry;
5312                 }
5313
5314                 last_timestamp = map->timestamp;
5315
5316                 while ((entry != vm_map_to_entry(map)) &&
5317                        (entry->vme_start < tmp_entry.vme_end)) {
5318                         assert(entry->in_transition);
5319                         entry->in_transition = FALSE;
5320                         if (entry->needs_wakeup) {
5321                                 entry->needs_wakeup = FALSE;
5322                                 need_wakeup = TRUE;
5323                         }
5324                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
5325                                 subtract_wire_counts(map, entry, user_wire);
5326                         }
5327                         entry = entry->vme_next;
5328                 }
5329
5330                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
5331                         goto done;
5332                 }
5333
5334                 s = entry->vme_start;
5335         } /* end while loop through map entries */
5336
5337 done:
5338         if (rc == KERN_SUCCESS) {
5339                 /* repair any damage we may have made to the VM map */
5340                 vm_map_simplify_range(map, start, end);
5341         }
5342
5343         vm_map_unlock(map);
5344
5345         /*
5346          * wake up anybody waiting on entries we wired.
5347          */
5348         if (need_wakeup)
5349                 vm_map_entry_wakeup(map);
5350
5351         if (rc != KERN_SUCCESS) {
5352                 /* undo what has been wired so far */
5353                 vm_map_unwire_nested(map, start, s, user_wire,
5354                                      map_pmap, pmap_addr);
5355                 if (physpage_p) {
5356                         *physpage_p = 0;
5357                 }
5358         }
5359
5360         return rc;
5361
5362 }
5363
5364 kern_return_t
5365 vm_map_wire_external(
5366         register vm_map_t       map,
5367         register vm_map_offset_t        start,
5368         register vm_map_offset_t        end,
5369         register vm_prot_t      caller_prot,
5370         boolean_t               user_wire)
5371 {
5372         kern_return_t   kret;
5373
5374         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5375         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5376         kret = vm_map_wire_nested(map, start, end, caller_prot,
5377                                   user_wire, (pmap_t)NULL, 0, NULL);
5378         return kret;
5379 }
5380
5381 kern_return_t
5382 vm_map_wire(
5383         register vm_map_t       map,
5384         register vm_map_offset_t        start,
5385         register vm_map_offset_t        end,
5386         register vm_prot_t      caller_prot,
5387         boolean_t               user_wire)
5388 {
5389         kern_return_t   kret;
5390
5391         kret = vm_map_wire_nested(map, start, end, caller_prot,
5392                                   user_wire, (pmap_t)NULL, 0, NULL);
5393         return kret;
5394 }
5395
5396 kern_return_t
5397 vm_map_wire_and_extract_external(
5398         vm_map_t        map,
5399         vm_map_offset_t start,
5400         vm_prot_t       caller_prot,
5401         boolean_t       user_wire,
5402         ppnum_t         *physpage_p)
5403 {
5404         kern_return_t   kret;
5405
5406         caller_prot &= ~VM_PROT_MEMORY_TAG_MASK;
5407         caller_prot |= VM_PROT_MEMORY_TAG_MAKE(vm_tag_bt());
5408         kret = vm_map_wire_nested(map,
5409                                   start,
5410                                   start+VM_MAP_PAGE_SIZE(map),
5411                                   caller_prot,
5412                                   user_wire,
5413                                   (pmap_t)NULL,
5414                                   0,
5415                                   physpage_p);
5416         if (kret != KERN_SUCCESS &&
5417             physpage_p != NULL) {
5418                 *physpage_p = 0;
5419         }
5420         return kret;
5421 }
5422
5423 kern_return_t
5424 vm_map_wire_and_extract(
5425         vm_map_t        map,
5426         vm_map_offset_t start,
5427         vm_prot_t       caller_prot,
5428         boolean_t       user_wire,
5429         ppnum_t         *physpage_p)
5430 {
5431         kern_return_t   kret;
5432
5433         kret = vm_map_wire_nested(map,
5434                                   start,
5435                                   start+VM_MAP_PAGE_SIZE(map),
5436                                   caller_prot,
5437                                   user_wire,
5438                                   (pmap_t)NULL,
5439                                   0,
5440                                   physpage_p);
5441         if (kret != KERN_SUCCESS &&
5442             physpage_p != NULL) {
5443                 *physpage_p = 0;
5444         }
5445         return kret;
5446 }
5447
5448 /*
5449  *      vm_map_unwire:
5450  *
5451  *      Sets the pageability of the specified address range in the target
5452  *      as pageable.  Regions specified must have been wired previously.
5453  *
5454  *      The map must not be locked, but a reference must remain to the map
5455  *      throughout the call.
5456  *
5457  *      Kernel will panic on failures.  User unwire ignores holes and
5458  *      unwired and intransition entries to avoid losing memory by leaving
5459  *      it unwired.
5460  */
5461 static kern_return_t
5462 vm_map_unwire_nested(
5463         register vm_map_t       map,
5464         register vm_map_offset_t        start,
5465         register vm_map_offset_t        end,
5466         boolean_t               user_wire,
5467         pmap_t                  map_pmap,
5468         vm_map_offset_t         pmap_addr)
5469 {
5470         register vm_map_entry_t entry;
5471         struct vm_map_entry     *first_entry, tmp_entry;
5472         boolean_t               need_wakeup;
5473         boolean_t               main_map = FALSE;
5474         unsigned int            last_timestamp;
5475
5476         vm_map_lock(map);
5477         if(map_pmap == NULL)
5478                 main_map = TRUE;
5479         last_timestamp = map->timestamp;
5480
5481         VM_MAP_RANGE_CHECK(map, start, end);
5482         assert(page_aligned(start));
5483         assert(page_aligned(end));
5484         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5485         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5486
5487         if (start == end) {
5488                 /* We unwired what the caller asked for: zero pages */
5489                 vm_map_unlock(map);
5490                 return KERN_SUCCESS;
5491         }
5492
5493         if (vm_map_lookup_entry(map, start, &first_entry)) {
5494                 entry = first_entry;
5495                 /*
5496                  * vm_map_clip_start will be done later.
5497                  * We don't want to unnest any nested sub maps here !
5498                  */
5499         }
5500         else {
5501                 if (!user_wire) {
5502                         panic("vm_map_unwire: start not found");
5503                 }
5504                 /*      Start address is not in map. */
5505                 vm_map_unlock(map);
5506                 return(KERN_INVALID_ADDRESS);
5507         }
5508
5509         if (entry->superpage_size) {
5510                 /* superpages are always wired */
5511                 vm_map_unlock(map);
5512                 return KERN_INVALID_ADDRESS;
5513         }
5514
5515         need_wakeup = FALSE;
5516         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5517                 if (entry->in_transition) {
5518                         /*
5519                          * 1)
5520                          * Another thread is wiring down this entry. Note
5521                          * that if it is not for the other thread we would
5522                          * be unwiring an unwired entry.  This is not
5523                          * permitted.  If we wait, we will be unwiring memory
5524                          * we did not wire.
5525                          *
5526                          * 2)
5527                          * Another thread is unwiring this entry.  We did not
5528                          * have a reference to it, because if we did, this
5529                          * entry will not be getting unwired now.
5530                          */
5531                         if (!user_wire) {
5532                                 /*
5533                                  * XXX FBDP
5534                                  * This could happen:  there could be some
5535                                  * overlapping vslock/vsunlock operations
5536                                  * going on.
5537                                  * We should probably just wait and retry,
5538                                  * but then we have to be careful that this
5539                                  * entry could get "simplified" after
5540                                  * "in_transition" gets unset and before
5541                                  * we re-lookup the entry, so we would
5542                                  * have to re-clip the entry to avoid
5543                                  * re-unwiring what we have already unwired...
5544                                  * See vm_map_wire_nested().
5545                                  *
5546                                  * Or we could just ignore "in_transition"
5547                                  * here and proceed to decement the wired
5548                                  * count(s) on this entry.  That should be fine
5549                                  * as long as "wired_count" doesn't drop all
5550                                  * the way to 0 (and we should panic if THAT
5551                                  * happens).
5552                                  */
5553                                 panic("vm_map_unwire: in_transition entry");
5554                         }
5555
5556                         entry = entry->vme_next;
5557                         continue;
5558                 }
5559
5560                 if (entry->is_sub_map) {
5561                         vm_map_offset_t sub_start;
5562                         vm_map_offset_t sub_end;
5563                         vm_map_offset_t local_end;
5564                         pmap_t          pmap;
5565
5566                         vm_map_clip_start(map, entry, start);
5567                         vm_map_clip_end(map, entry, end);
5568
5569                         sub_start = VME_OFFSET(entry);
5570                         sub_end = entry->vme_end - entry->vme_start;
5571                         sub_end += VME_OFFSET(entry);
5572                         local_end = entry->vme_end;
5573                         if(map_pmap == NULL) {
5574                                 if(entry->use_pmap) {
5575                                         pmap = VME_SUBMAP(entry)->pmap;
5576                                         pmap_addr = sub_start;
5577                                 } else {
5578                                         pmap = map->pmap;
5579                                         pmap_addr = start;
5580                                 }
5581                                 if (entry->wired_count == 0 ||
5582                                     (user_wire && entry->user_wired_count == 0)) {
5583                                         if (!user_wire)
5584                                                 panic("vm_map_unwire: entry is unwired");
5585                                         entry = entry->vme_next;
5586                                         continue;
5587                                 }
5588
5589                                 /*
5590                                  * Check for holes
5591                                  * Holes: Next entry should be contiguous unless
5592                                  * this is the end of the region.
5593                                  */
5594                                 if (((entry->vme_end < end) &&
5595                                      ((entry->vme_next == vm_map_to_entry(map)) ||
5596                                       (entry->vme_next->vme_start
5597                                        > entry->vme_end)))) {
5598                                         if (!user_wire)
5599                                                 panic("vm_map_unwire: non-contiguous region");
5600 /*
5601                                         entry = entry->vme_next;
5602                                         continue;
5603 */
5604                                 }
5605
5606                                 subtract_wire_counts(map, entry, user_wire);
5607
5608                                 if (entry->wired_count != 0) {
5609                                         entry = entry->vme_next;
5610                                         continue;
5611                                 }
5612
5613                                 entry->in_transition = TRUE;
5614                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
5615
5616                                 /*
5617                                  * We can unlock the map now. The in_transition state
5618                                  * guarantees existance of the entry.
5619                                  */
5620                                 vm_map_unlock(map);
5621                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5622                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
5623                                 vm_map_lock(map);
5624
5625                                 if (last_timestamp+1 != map->timestamp) {
5626                                         /*
5627                                          * Find the entry again.  It could have been
5628                                          * clipped or deleted after we unlocked the map.
5629                                          */
5630                                         if (!vm_map_lookup_entry(map,
5631                                                                  tmp_entry.vme_start,
5632                                                                  &first_entry)) {
5633                                                 if (!user_wire)
5634                                                         panic("vm_map_unwire: re-lookup failed");
5635                                                 entry = first_entry->vme_next;
5636                                         } else
5637                                                 entry = first_entry;
5638                                 }
5639                                 last_timestamp = map->timestamp;
5640
5641                                 /*
5642                                  * clear transition bit for all constituent entries
5643                                  * that were in the original entry (saved in
5644                                  * tmp_entry).  Also check for waiters.
5645                                  */
5646                                 while ((entry != vm_map_to_entry(map)) &&
5647                                        (entry->vme_start < tmp_entry.vme_end)) {
5648                                         assert(entry->in_transition);
5649                                         entry->in_transition = FALSE;
5650                                         if (entry->needs_wakeup) {
5651                                                 entry->needs_wakeup = FALSE;
5652                                                 need_wakeup = TRUE;
5653                                         }
5654                                         entry = entry->vme_next;
5655                                 }
5656                                 continue;
5657                         } else {
5658                                 vm_map_unlock(map);
5659                                 vm_map_unwire_nested(VME_SUBMAP(entry),
5660                                                      sub_start, sub_end, user_wire, map_pmap,
5661                                                      pmap_addr);
5662                                 vm_map_lock(map);
5663
5664                                 if (last_timestamp+1 != map->timestamp) {
5665                                         /*
5666                                          * Find the entry again.  It could have been
5667                                          * clipped or deleted after we unlocked the map.
5668                                          */
5669                                         if (!vm_map_lookup_entry(map,
5670                                                                  tmp_entry.vme_start,
5671                                                                  &first_entry)) {
5672                                                 if (!user_wire)
5673                                                         panic("vm_map_unwire: re-lookup failed");
5674                                                 entry = first_entry->vme_next;
5675                                         } else
5676                                                 entry = first_entry;
5677                                 }
5678                                 last_timestamp = map->timestamp;
5679                         }
5680                 }
5681
5682
5683                 if ((entry->wired_count == 0) ||
5684                     (user_wire && entry->user_wired_count == 0)) {
5685                         if (!user_wire)
5686                                 panic("vm_map_unwire: entry is unwired");
5687
5688                         entry = entry->vme_next;
5689                         continue;
5690                 }
5691
5692                 assert(entry->wired_count > 0 &&
5693                        (!user_wire || entry->user_wired_count > 0));
5694
5695                 vm_map_clip_start(map, entry, start);
5696                 vm_map_clip_end(map, entry, end);
5697
5698                 /*
5699                  * Check for holes
5700                  * Holes: Next entry should be contiguous unless
5701                  *        this is the end of the region.
5702                  */
5703                 if (((entry->vme_end < end) &&
5704                      ((entry->vme_next == vm_map_to_entry(map)) ||
5705                       (entry->vme_next->vme_start > entry->vme_end)))) {
5706
5707                         if (!user_wire)
5708                                 panic("vm_map_unwire: non-contiguous region");
5709                         entry = entry->vme_next;
5710                         continue;
5711                 }
5712
5713                 subtract_wire_counts(map, entry, user_wire);
5714
5715                 if (entry->wired_count != 0) {
5716                         entry = entry->vme_next;
5717                         continue;
5718                 }
5719
5720                 if(entry->zero_wired_pages) {
5721                         entry->zero_wired_pages = FALSE;
5722                 }
5723
5724                 entry->in_transition = TRUE;
5725                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
5726
5727                 /*
5728                  * We can unlock the map now. The in_transition state
5729                  * guarantees existance of the entry.
5730                  */
5731                 vm_map_unlock(map);
5732                 if(map_pmap) {
5733                         vm_fault_unwire(map,
5734                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
5735                 } else {
5736                         vm_fault_unwire(map,
5737                                         &tmp_entry, FALSE, map->pmap,
5738                                         tmp_entry.vme_start);
5739                 }
5740                 vm_map_lock(map);
5741
5742                 if (last_timestamp+1 != map->timestamp) {
5743                         /*
5744                          * Find the entry again.  It could have been clipped
5745                          * or deleted after we unlocked the map.
5746                          */
5747                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5748                                                  &first_entry)) {
5749                                 if (!user_wire)
5750                                         panic("vm_map_unwire: re-lookup failed");
5751                                 entry = first_entry->vme_next;
5752                         } else
5753                                 entry = first_entry;
5754                 }
5755                 last_timestamp = map->timestamp;
5756
5757                 /*
5758                  * clear transition bit for all constituent entries that
5759                  * were in the original entry (saved in tmp_entry).  Also
5760                  * check for waiters.
5761                  */
5762                 while ((entry != vm_map_to_entry(map)) &&
5763                        (entry->vme_start < tmp_entry.vme_end)) {
5764                         assert(entry->in_transition);
5765                         entry->in_transition = FALSE;
5766                         if (entry->needs_wakeup) {
5767                                 entry->needs_wakeup = FALSE;
5768                                 need_wakeup = TRUE;
5769                         }
5770                         entry = entry->vme_next;
5771                 }
5772         }
5773
5774         /*
5775          * We might have fragmented the address space when we wired this
5776          * range of addresses.  Attempt to re-coalesce these VM map entries
5777          * with their neighbors now that they're no longer wired.
5778          * Under some circumstances, address space fragmentation can
5779          * prevent VM object shadow chain collapsing, which can cause
5780          * swap space leaks.
5781          */
5782         vm_map_simplify_range(map, start, end);
5783
5784         vm_map_unlock(map);
5785         /*
5786          * wake up anybody waiting on entries that we have unwired.
5787          */
5788         if (need_wakeup)
5789                 vm_map_entry_wakeup(map);
5790         return(KERN_SUCCESS);
5791
5792 }
5793
5794 kern_return_t
5795 vm_map_unwire(
5796         register vm_map_t       map,
5797         register vm_map_offset_t        start,
5798         register vm_map_offset_t        end,
5799         boolean_t               user_wire)
5800 {
5801         return vm_map_unwire_nested(map, start, end,
5802                                     user_wire, (pmap_t)NULL, 0);
5803 }
5804
5805
5806 /*
5807  *      vm_map_entry_delete:    [ internal use only ]
5808  *
5809  *      Deallocate the given entry from the target map.
5810  */
5811 static void
5812 vm_map_entry_delete(
5813         register vm_map_t       map,
5814         register vm_map_entry_t entry)
5815 {
5816         register vm_map_offset_t        s, e;
5817         register vm_object_t    object;
5818         register vm_map_t       submap;
5819
5820         s = entry->vme_start;
5821         e = entry->vme_end;
5822         assert(page_aligned(s));
5823         assert(page_aligned(e));
5824         if (entry->map_aligned == TRUE) {
5825                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5826                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5827         }
5828         assert(entry->wired_count == 0);
5829         assert(entry->user_wired_count == 0);
5830         assert(!entry->permanent);
5831
5832         if (entry->is_sub_map) {
5833                 object = NULL;
5834                 submap = VME_SUBMAP(entry);
5835         } else {
5836                 submap = NULL;
5837                 object = VME_OBJECT(entry);
5838         }
5839
5840         vm_map_store_entry_unlink(map, entry);
5841         map->size -= e - s;
5842
5843         vm_map_entry_dispose(map, entry);
5844
5845         vm_map_unlock(map);
5846         /*
5847          *      Deallocate the object only after removing all
5848          *      pmap entries pointing to its pages.
5849          */
5850         if (submap)
5851                 vm_map_deallocate(submap);
5852         else
5853                 vm_object_deallocate(object);
5854
5855 }
5856
5857 void
5858 vm_map_submap_pmap_clean(
5859         vm_map_t        map,
5860         vm_map_offset_t start,
5861         vm_map_offset_t end,
5862         vm_map_t        sub_map,
5863         vm_map_offset_t offset)
5864 {
5865         vm_map_offset_t submap_start;
5866         vm_map_offset_t submap_end;
5867         vm_map_size_t   remove_size;
5868         vm_map_entry_t  entry;
5869
5870         submap_end = offset + (end - start);
5871         submap_start = offset;
5872
5873         vm_map_lock_read(sub_map);
5874         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5875
5876                 remove_size = (entry->vme_end - entry->vme_start);
5877                 if(offset > entry->vme_start)
5878                         remove_size -= offset - entry->vme_start;
5879
5880
5881                 if(submap_end < entry->vme_end) {
5882                         remove_size -=
5883                                 entry->vme_end - submap_end;
5884                 }
5885                 if(entry->is_sub_map) {
5886                         vm_map_submap_pmap_clean(
5887                                 sub_map,
5888                                 start,
5889                                 start + remove_size,
5890                                 VME_SUBMAP(entry),
5891                                 VME_OFFSET(entry));
5892                 } else {
5893
5894                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5895                            && (VME_OBJECT(entry) != NULL)) {
5896                                 vm_object_pmap_protect_options(
5897                                         VME_OBJECT(entry),
5898                                         (VME_OFFSET(entry) +
5899                                          offset -
5900                                          entry->vme_start),
5901                                         remove_size,
5902                                         PMAP_NULL,
5903                                         entry->vme_start,
5904                                         VM_PROT_NONE,
5905                                         PMAP_OPTIONS_REMOVE);
5906                         } else {
5907                                 pmap_remove(map->pmap,
5908                                             (addr64_t)start,
5909                                             (addr64_t)(start + remove_size));
5910                         }
5911                 }
5912         }
5913
5914         entry = entry->vme_next;
5915
5916         while((entry != vm_map_to_entry(sub_map))
5917               && (entry->vme_start < submap_end)) {
5918                 remove_size = (entry->vme_end - entry->vme_start);
5919                 if(submap_end < entry->vme_end) {
5920                         remove_size -= entry->vme_end - submap_end;
5921                 }
5922                 if(entry->is_sub_map) {
5923                         vm_map_submap_pmap_clean(
5924                                 sub_map,
5925                                 (start + entry->vme_start) - offset,
5926                                 ((start + entry->vme_start) - offset) + remove_size,
5927                                 VME_SUBMAP(entry),
5928                                 VME_OFFSET(entry));
5929                 } else {
5930                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5931                            && (VME_OBJECT(entry) != NULL)) {
5932                                 vm_object_pmap_protect_options(
5933                                         VME_OBJECT(entry),
5934                                         VME_OFFSET(entry),
5935                                         remove_size,
5936                                         PMAP_NULL,
5937                                         entry->vme_start,
5938                                         VM_PROT_NONE,
5939                                         PMAP_OPTIONS_REMOVE);
5940                         } else {
5941                                 pmap_remove(map->pmap,
5942                                             (addr64_t)((start + entry->vme_start)
5943                                                        - offset),
5944                                             (addr64_t)(((start + entry->vme_start)
5945                                                         - offset) + remove_size));
5946                         }
5947                 }
5948                 entry = entry->vme_next;
5949         }
5950         vm_map_unlock_read(sub_map);
5951         return;
5952 }
5953
5954 /*
5955  *      vm_map_delete:  [ internal use only ]
5956  *
5957  *      Deallocates the given address range from the target map.
5958  *      Removes all user wirings. Unwires one kernel wiring if
5959  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
5960  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
5961  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5962  *
5963  *      This routine is called with map locked and leaves map locked.
5964  */
5965 static kern_return_t
5966 vm_map_delete(
5967         vm_map_t                map,
5968         vm_map_offset_t         start,
5969         vm_map_offset_t         end,
5970         int                     flags,
5971         vm_map_t                zap_map)
5972 {
5973         vm_map_entry_t          entry, next;
5974         struct   vm_map_entry   *first_entry, tmp_entry;
5975         register vm_map_offset_t s;
5976         register vm_object_t    object;
5977         boolean_t               need_wakeup;
5978         unsigned int            last_timestamp = ~0; /* unlikely value */
5979         int                     interruptible;
5980
5981         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5982                 THREAD_ABORTSAFE : THREAD_UNINT;
5983
5984         /*
5985          * All our DMA I/O operations in IOKit are currently done by
5986          * wiring through the map entries of the task requesting the I/O.
5987          * Because of this, we must always wait for kernel wirings
5988          * to go away on the entries before deleting them.
5989          *
5990          * Any caller who wants to actually remove a kernel wiring
5991          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5992          * properly remove one wiring instead of blasting through
5993          * them all.
5994          */
5995         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5996
5997         while(1) {
5998                 /*
5999                  *      Find the start of the region, and clip it
6000                  */
6001                 if (vm_map_lookup_entry(map, start, &first_entry)) {
6002                         entry = first_entry;
6003                         if (map == kalloc_map &&
6004                             (entry->vme_start != start ||
6005                              entry->vme_end != end)) {
6006                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6007                                       "mismatched entry %p [0x%llx:0x%llx]\n",
6008                                       map,
6009                                       (uint64_t)start,
6010                                       (uint64_t)end,
6011                                       entry,
6012                                       (uint64_t)entry->vme_start,
6013                                       (uint64_t)entry->vme_end);
6014                         }
6015                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
6016                                 start = SUPERPAGE_ROUND_DOWN(start);
6017                                 continue;
6018                         }
6019                         if (start == entry->vme_start) {
6020                                 /*
6021                                  * No need to clip.  We don't want to cause
6022                                  * any unnecessary unnesting in this case...
6023                                  */
6024                         } else {
6025                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6026                                     entry->map_aligned &&
6027                                     !VM_MAP_PAGE_ALIGNED(
6028                                             start,
6029                                             VM_MAP_PAGE_MASK(map))) {
6030                                         /*
6031                                          * The entry will no longer be
6032                                          * map-aligned after clipping
6033                                          * and the caller said it's OK.
6034                                          */
6035                                         entry->map_aligned = FALSE;
6036                                 }
6037                                 if (map == kalloc_map) {
6038                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
6039                                               " clipping %p at 0x%llx\n",
6040                                               map,
6041                                               (uint64_t)start,
6042                                               (uint64_t)end,
6043                                               entry,
6044                                               (uint64_t)start);
6045                                 }
6046                                 vm_map_clip_start(map, entry, start);
6047                         }
6048
6049                         /*
6050                          *      Fix the lookup hint now, rather than each
6051                          *      time through the loop.
6052                          */
6053                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6054                 } else {
6055                         if (map->pmap == kernel_pmap &&
6056                             map->ref_count != 0) {
6057                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6058                                       "no map entry at 0x%llx\n",
6059                                       map,
6060                                       (uint64_t)start,
6061                                       (uint64_t)end,
6062                                       (uint64_t)start);
6063                         }
6064                         entry = first_entry->vme_next;
6065                 }
6066                 break;
6067         }
6068         if (entry->superpage_size)
6069                 end = SUPERPAGE_ROUND_UP(end);
6070
6071         need_wakeup = FALSE;
6072         /*
6073          *      Step through all entries in this region
6074          */
6075         s = entry->vme_start;
6076         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6077                 /*
6078                  * At this point, we have deleted all the memory entries
6079                  * between "start" and "s".  We still need to delete
6080                  * all memory entries between "s" and "end".
6081                  * While we were blocked and the map was unlocked, some
6082                  * new memory entries could have been re-allocated between
6083                  * "start" and "s" and we don't want to mess with those.
6084                  * Some of those entries could even have been re-assembled
6085                  * with an entry after "s" (in vm_map_simplify_entry()), so
6086                  * we may have to vm_map_clip_start() again.
6087                  */
6088
6089                 if (entry->vme_start >= s) {
6090                         /*
6091                          * This entry starts on or after "s"
6092                          * so no need to clip its start.
6093                          */
6094                 } else {
6095                         /*
6096                          * This entry has been re-assembled by a
6097                          * vm_map_simplify_entry().  We need to
6098                          * re-clip its start.
6099                          */
6100                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6101                             entry->map_aligned &&
6102                             !VM_MAP_PAGE_ALIGNED(s,
6103                                                  VM_MAP_PAGE_MASK(map))) {
6104                                 /*
6105                                  * The entry will no longer be map-aligned
6106                                  * after clipping and the caller said it's OK.
6107                                  */
6108                                 entry->map_aligned = FALSE;
6109                         }
6110                         if (map == kalloc_map) {
6111                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6112                                       "clipping %p at 0x%llx\n",
6113                                       map,
6114                                       (uint64_t)start,
6115                                       (uint64_t)end,
6116                                       entry,
6117                                       (uint64_t)s);
6118                         }
6119                         vm_map_clip_start(map, entry, s);
6120                 }
6121                 if (entry->vme_end <= end) {
6122                         /*
6123                          * This entry is going away completely, so no need
6124                          * to clip and possibly cause an unnecessary unnesting.
6125                          */
6126                 } else {
6127                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
6128                             entry->map_aligned &&
6129                             !VM_MAP_PAGE_ALIGNED(end,
6130                                                  VM_MAP_PAGE_MASK(map))) {
6131                                 /*
6132                                  * The entry will no longer be map-aligned
6133                                  * after clipping and the caller said it's OK.
6134                                  */
6135                                 entry->map_aligned = FALSE;
6136                         }
6137                         if (map == kalloc_map) {
6138                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
6139                                       "clipping %p at 0x%llx\n",
6140                                       map,
6141                                       (uint64_t)start,
6142                                       (uint64_t)end,
6143                                       entry,
6144                                       (uint64_t)end);
6145                         }
6146                         vm_map_clip_end(map, entry, end);
6147                 }
6148
6149                 if (entry->permanent) {
6150                         panic("attempt to remove permanent VM map entry "
6151                               "%p [0x%llx:0x%llx]\n",
6152                               entry, (uint64_t) s, (uint64_t) end);
6153                 }
6154
6155
6156                 if (entry->in_transition) {
6157                         wait_result_t wait_result;
6158
6159                         /*
6160                          * Another thread is wiring/unwiring this entry.
6161                          * Let the other thread know we are waiting.
6162                          */
6163                         assert(s == entry->vme_start);
6164                         entry->needs_wakeup = TRUE;
6165
6166                         /*
6167                          * wake up anybody waiting on entries that we have
6168                          * already unwired/deleted.
6169                          */
6170                         if (need_wakeup) {
6171                                 vm_map_entry_wakeup(map);
6172                                 need_wakeup = FALSE;
6173                         }
6174
6175                         wait_result = vm_map_entry_wait(map, interruptible);
6176
6177                         if (interruptible &&
6178                             wait_result == THREAD_INTERRUPTED) {
6179                                 /*
6180                                  * We do not clear the needs_wakeup flag,
6181                                  * since we cannot tell if we were the only one.
6182                                  */
6183                                 return KERN_ABORTED;
6184                         }
6185
6186                         /*
6187                          * The entry could have been clipped or it
6188                          * may not exist anymore.  Look it up again.
6189                          */
6190                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6191                                 /*
6192                                  * User: use the next entry
6193                                  */
6194                                 entry = first_entry->vme_next;
6195                                 s = entry->vme_start;
6196                         } else {
6197                                 entry = first_entry;
6198                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6199                         }
6200                         last_timestamp = map->timestamp;
6201                         continue;
6202                 } /* end in_transition */
6203
6204                 if (entry->wired_count) {
6205                         boolean_t       user_wire;
6206
6207                         user_wire = entry->user_wired_count > 0;
6208
6209                         /*
6210                          *      Remove a kernel wiring if requested
6211                          */
6212                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
6213                                 entry->wired_count--;
6214                         }
6215
6216                         /*
6217                          *      Remove all user wirings for proper accounting
6218                          */
6219                         if (entry->user_wired_count > 0) {
6220                                 while (entry->user_wired_count)
6221                                         subtract_wire_counts(map, entry, user_wire);
6222                         }
6223
6224                         if (entry->wired_count != 0) {
6225                                 assert(map != kernel_map);
6226                                 /*
6227                                  * Cannot continue.  Typical case is when
6228                                  * a user thread has physical io pending on
6229                                  * on this page.  Either wait for the
6230                                  * kernel wiring to go away or return an
6231                                  * error.
6232                                  */
6233                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
6234                                         wait_result_t wait_result;
6235
6236                                         assert(s == entry->vme_start);
6237                                         entry->needs_wakeup = TRUE;
6238                                         wait_result = vm_map_entry_wait(map,
6239                                                                         interruptible);
6240
6241                                         if (interruptible &&
6242                                             wait_result == THREAD_INTERRUPTED) {
6243                                                 /*
6244                                                  * We do not clear the
6245                                                  * needs_wakeup flag, since we
6246                                                  * cannot tell if we were the
6247                                                  * only one.
6248                                                  */
6249                                                 return KERN_ABORTED;
6250                                         }
6251
6252                                         /*
6253                                          * The entry could have been clipped or
6254                                          * it may not exist anymore.  Look it
6255                                          * up again.
6256                                          */
6257                                         if (!vm_map_lookup_entry(map, s,
6258                                                                  &first_entry)) {
6259                                                 assert(map != kernel_map);
6260                                                 /*
6261                                                  * User: use the next entry
6262                                                  */
6263                                                 entry = first_entry->vme_next;
6264                                                 s = entry->vme_start;
6265                                         } else {
6266                                                 entry = first_entry;
6267                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6268                                         }
6269                                         last_timestamp = map->timestamp;
6270                                         continue;
6271                                 }
6272                                 else {
6273                                         return KERN_FAILURE;
6274                                 }
6275                         }
6276
6277                         entry->in_transition = TRUE;
6278                         /*
6279                          * copy current entry.  see comment in vm_map_wire()
6280                          */
6281                         tmp_entry = *entry;
6282                         assert(s == entry->vme_start);
6283
6284                         /*
6285                          * We can unlock the map now. The in_transition
6286                          * state guarentees existance of the entry.
6287                          */
6288                         vm_map_unlock(map);
6289
6290                         if (tmp_entry.is_sub_map) {
6291                                 vm_map_t sub_map;
6292                                 vm_map_offset_t sub_start, sub_end;
6293                                 pmap_t pmap;
6294                                 vm_map_offset_t pmap_addr;
6295
6296
6297                                 sub_map = VME_SUBMAP(&tmp_entry);
6298                                 sub_start = VME_OFFSET(&tmp_entry);
6299                                 sub_end = sub_start + (tmp_entry.vme_end -
6300                                                        tmp_entry.vme_start);
6301                                 if (tmp_entry.use_pmap) {
6302                                         pmap = sub_map->pmap;
6303                                         pmap_addr = tmp_entry.vme_start;
6304                                 } else {
6305                                         pmap = map->pmap;
6306                                         pmap_addr = tmp_entry.vme_start;
6307                                 }
6308                                 (void) vm_map_unwire_nested(sub_map,
6309                                                             sub_start, sub_end,
6310                                                             user_wire,
6311                                                             pmap, pmap_addr);
6312                         } else {
6313
6314                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
6315                                         pmap_protect_options(
6316                                                 map->pmap,
6317                                                 tmp_entry.vme_start,
6318                                                 tmp_entry.vme_end,
6319                                                 VM_PROT_NONE,
6320                                                 PMAP_OPTIONS_REMOVE,
6321                                                 NULL);
6322                                 }
6323                                 vm_fault_unwire(map, &tmp_entry,
6324                                                 VME_OBJECT(&tmp_entry) == kernel_object,
6325                                                 map->pmap, tmp_entry.vme_start);
6326                         }
6327
6328                         vm_map_lock(map);
6329
6330                         if (last_timestamp+1 != map->timestamp) {
6331                                 /*
6332                                  * Find the entry again.  It could have
6333                                  * been clipped after we unlocked the map.
6334                                  */
6335                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
6336                                         assert((map != kernel_map) &&
6337                                                (!entry->is_sub_map));
6338                                         first_entry = first_entry->vme_next;
6339                                         s = first_entry->vme_start;
6340                                 } else {
6341                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6342                                 }
6343                         } else {
6344                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6345                                 first_entry = entry;
6346                         }
6347
6348                         last_timestamp = map->timestamp;
6349
6350                         entry = first_entry;
6351                         while ((entry != vm_map_to_entry(map)) &&
6352                                (entry->vme_start < tmp_entry.vme_end)) {
6353                                 assert(entry->in_transition);
6354                                 entry->in_transition = FALSE;
6355                                 if (entry->needs_wakeup) {
6356                                         entry->needs_wakeup = FALSE;
6357                                         need_wakeup = TRUE;
6358                                 }
6359                                 entry = entry->vme_next;
6360                         }
6361                         /*
6362                          * We have unwired the entry(s).  Go back and
6363                          * delete them.
6364                          */
6365                         entry = first_entry;
6366                         continue;
6367                 }
6368
6369                 /* entry is unwired */
6370                 assert(entry->wired_count == 0);
6371                 assert(entry->user_wired_count == 0);
6372
6373                 assert(s == entry->vme_start);
6374
6375                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
6376                         /*
6377                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
6378                          * vm_map_delete(), some map entries might have been
6379                          * transferred to a "zap_map", which doesn't have a
6380                          * pmap.  The original pmap has already been flushed
6381                          * in the vm_map_delete() call targeting the original
6382                          * map, but when we get to destroying the "zap_map",
6383                          * we don't have any pmap to flush, so let's just skip
6384                          * all this.
6385                          */
6386                 } else if (entry->is_sub_map) {
6387                         if (entry->use_pmap) {
6388 #ifndef NO_NESTED_PMAP
6389                                 int pmap_flags;
6390
6391                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
6392                                         /*
6393                                          * This is the final cleanup of the
6394                                          * address space being terminated.
6395                                          * No new mappings are expected and
6396                                          * we don't really need to unnest the
6397                                          * shared region (and lose the "global"
6398                                          * pmap mappings, if applicable).
6399                                          *
6400                                          * Tell the pmap layer that we're
6401                                          * "clean" wrt nesting.
6402                                          */
6403                                         pmap_flags = PMAP_UNNEST_CLEAN;
6404                                 } else {
6405                                         /*
6406                                          * We're unmapping part of the nested
6407                                          * shared region, so we can't keep the
6408                                          * nested pmap.
6409                                          */
6410                                         pmap_flags = 0;
6411                                 }
6412                                 pmap_unnest_options(
6413                                         map->pmap,
6414                                         (addr64_t)entry->vme_start,
6415                                         entry->vme_end - entry->vme_start,
6416                                         pmap_flags);
6417 #endif  /* NO_NESTED_PMAP */
6418                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6419                                         /* clean up parent map/maps */
6420                                         vm_map_submap_pmap_clean(
6421                                                 map, entry->vme_start,
6422                                                 entry->vme_end,
6423                                                 VME_SUBMAP(entry),
6424                                                 VME_OFFSET(entry));
6425                                 }
6426                         } else {
6427                                 vm_map_submap_pmap_clean(
6428                                         map, entry->vme_start, entry->vme_end,
6429                                         VME_SUBMAP(entry),
6430                                         VME_OFFSET(entry));
6431                         }
6432                 } else if (VME_OBJECT(entry) != kernel_object &&
6433                            VME_OBJECT(entry) != compressor_object) {
6434                         object = VME_OBJECT(entry);
6435                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
6436                                 vm_object_pmap_protect_options(
6437                                         object, VME_OFFSET(entry),
6438                                         entry->vme_end - entry->vme_start,
6439                                         PMAP_NULL,
6440                                         entry->vme_start,
6441                                         VM_PROT_NONE,
6442                                         PMAP_OPTIONS_REMOVE);
6443                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
6444                                    (map->pmap == kernel_pmap)) {
6445                                 /* Remove translations associated
6446                                  * with this range unless the entry
6447                                  * does not have an object, or
6448                                  * it's the kernel map or a descendant
6449                                  * since the platform could potentially
6450                                  * create "backdoor" mappings invisible
6451                                  * to the VM. It is expected that
6452                                  * objectless, non-kernel ranges
6453                                  * do not have such VM invisible
6454                                  * translations.
6455                                  */
6456                                 pmap_remove_options(map->pmap,
6457                                                     (addr64_t)entry->vme_start,
6458                                                     (addr64_t)entry->vme_end,
6459                                                     PMAP_OPTIONS_REMOVE);
6460                         }
6461                 }
6462
6463                 if (entry->iokit_acct) {
6464                         /* alternate accounting */
6465                         vm_map_iokit_unmapped_region(map,
6466                                                      (entry->vme_end -
6467                                                       entry->vme_start));
6468                         entry->iokit_acct = FALSE;
6469                 }
6470
6471                 /*
6472                  * All pmap mappings for this map entry must have been
6473                  * cleared by now.
6474                  */
6475 #if DEBUG
6476                 assert(vm_map_pmap_is_empty(map,
6477                                             entry->vme_start,
6478                                             entry->vme_end));
6479 #endif /* DEBUG */
6480
6481                 next = entry->vme_next;
6482
6483                 if (map->pmap == kernel_pmap &&
6484                     map->ref_count != 0 &&
6485                     entry->vme_end < end &&
6486                     (next == vm_map_to_entry(map) ||
6487                      next->vme_start != entry->vme_end)) {
6488                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
6489                               "hole after %p at 0x%llx\n",
6490                               map,
6491                               (uint64_t)start,
6492                               (uint64_t)end,
6493                               entry,
6494                               (uint64_t)entry->vme_end);
6495                 }
6496
6497                 s = next->vme_start;
6498                 last_timestamp = map->timestamp;
6499
6500                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6501                     zap_map != VM_MAP_NULL) {
6502                         vm_map_size_t entry_size;
6503                         /*
6504                          * The caller wants to save the affected VM map entries
6505                          * into the "zap_map".  The caller will take care of
6506                          * these entries.
6507                          */
6508                         /* unlink the entry from "map" ... */
6509                         vm_map_store_entry_unlink(map, entry);
6510                         /* ... and add it to the end of the "zap_map" */
6511                         vm_map_store_entry_link(zap_map,
6512                                           vm_map_last_entry(zap_map),
6513                                           entry);
6514                         entry_size = entry->vme_end - entry->vme_start;
6515                         map->size -= entry_size;
6516                         zap_map->size += entry_size;
6517                         /* we didn't unlock the map, so no timestamp increase */
6518                         last_timestamp--;
6519                 } else {
6520                         vm_map_entry_delete(map, entry);
6521                         /* vm_map_entry_delete unlocks the map */
6522                         vm_map_lock(map);
6523                 }
6524
6525                 entry = next;
6526
6527                 if(entry == vm_map_to_entry(map)) {
6528                         break;
6529                 }
6530                 if (last_timestamp+1 != map->timestamp) {
6531                         /*
6532                          * we are responsible for deleting everything
6533                          * from the give space, if someone has interfered
6534                          * we pick up where we left off, back fills should
6535                          * be all right for anyone except map_delete and
6536                          * we have to assume that the task has been fully
6537                          * disabled before we get here
6538                          */
6539                         if (!vm_map_lookup_entry(map, s, &entry)){
6540                                 entry = entry->vme_next;
6541                                 s = entry->vme_start;
6542                         } else {
6543                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6544                         }
6545                         /*
6546                          * others can not only allocate behind us, we can
6547                          * also see coalesce while we don't have the map lock
6548                          */
6549                         if(entry == vm_map_to_entry(map)) {
6550                                 break;
6551                         }
6552                 }
6553                 last_timestamp = map->timestamp;
6554         }
6555
6556         if (map->wait_for_space)
6557                 thread_wakeup((event_t) map);
6558         /*
6559          * wake up anybody waiting on entries that we have already deleted.
6560          */
6561         if (need_wakeup)
6562                 vm_map_entry_wakeup(map);
6563
6564         return KERN_SUCCESS;
6565 }
6566
6567 /*
6568  *      vm_map_remove:
6569  *
6570  *      Remove the given address range from the target map.
6571  *      This is the exported form of vm_map_delete.
6572  */
6573 kern_return_t
6574 vm_map_remove(
6575         register vm_map_t       map,
6576         register vm_map_offset_t        start,
6577         register vm_map_offset_t        end,
6578         register boolean_t      flags)
6579 {
6580         register kern_return_t  result;
6581
6582         vm_map_lock(map);
6583         VM_MAP_RANGE_CHECK(map, start, end);
6584         /*
6585          * For the zone_map, the kernel controls the allocation/freeing of memory.
6586          * Any free to the zone_map should be within the bounds of the map and
6587          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6588          * free to the zone_map into a no-op, there is a problem and we should
6589          * panic.
6590          */
6591         if ((map == zone_map) && (start == end))
6592                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6593         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6594         vm_map_unlock(map);
6595
6596         return(result);
6597 }
6598
6599
6600 /*
6601  *      Routine:        vm_map_copy_discard
6602  *
6603  *      Description:
6604  *              Dispose of a map copy object (returned by
6605  *              vm_map_copyin).
6606  */
6607 void
6608 vm_map_copy_discard(
6609         vm_map_copy_t   copy)
6610 {
6611         if (copy == VM_MAP_COPY_NULL)
6612                 return;
6613
6614         switch (copy->type) {
6615         case VM_MAP_COPY_ENTRY_LIST:
6616                 while (vm_map_copy_first_entry(copy) !=
6617                        vm_map_copy_to_entry(copy)) {
6618                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
6619
6620                         vm_map_copy_entry_unlink(copy, entry);
6621                         if (entry->is_sub_map) {
6622                                 vm_map_deallocate(VME_SUBMAP(entry));
6623                         } else {
6624                                 vm_object_deallocate(VME_OBJECT(entry));
6625                         }
6626                         vm_map_copy_entry_dispose(copy, entry);
6627                 }
6628                 break;
6629         case VM_MAP_COPY_OBJECT:
6630                 vm_object_deallocate(copy->cpy_object);
6631                 break;
6632         case VM_MAP_COPY_KERNEL_BUFFER:
6633
6634                 /*
6635                  * The vm_map_copy_t and possibly the data buffer were
6636                  * allocated by a single call to kalloc(), i.e. the
6637                  * vm_map_copy_t was not allocated out of the zone.
6638                  */
6639                 if (copy->size > msg_ool_size_small || copy->offset)
6640                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
6641                               (long long)copy->size, (long long)copy->offset);
6642                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
6643                 return;
6644         }
6645         zfree(vm_map_copy_zone, copy);
6646 }
6647
6648 /*
6649  *      Routine:        vm_map_copy_copy
6650  *
6651  *      Description:
6652  *                      Move the information in a map copy object to
6653  *                      a new map copy object, leaving the old one
6654  *                      empty.
6655  *
6656  *                      This is used by kernel routines that need
6657  *                      to look at out-of-line data (in copyin form)
6658  *                      before deciding whether to return SUCCESS.
6659  *                      If the routine returns FAILURE, the original
6660  *                      copy object will be deallocated; therefore,
6661  *                      these routines must make a copy of the copy
6662  *                      object and leave the original empty so that
6663  *                      deallocation will not fail.
6664  */
6665 vm_map_copy_t
6666 vm_map_copy_copy(
6667         vm_map_copy_t   copy)
6668 {
6669         vm_map_copy_t   new_copy;
6670
6671         if (copy == VM_MAP_COPY_NULL)
6672                 return VM_MAP_COPY_NULL;
6673
6674         /*
6675          * Allocate a new copy object, and copy the information
6676          * from the old one into it.
6677          */
6678
6679         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6680         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
6681         *new_copy = *copy;
6682
6683         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6684                 /*
6685                  * The links in the entry chain must be
6686                  * changed to point to the new copy object.
6687                  */
6688                 vm_map_copy_first_entry(copy)->vme_prev
6689                         = vm_map_copy_to_entry(new_copy);
6690                 vm_map_copy_last_entry(copy)->vme_next
6691                         = vm_map_copy_to_entry(new_copy);
6692         }
6693
6694         /*
6695          * Change the old copy object into one that contains
6696          * nothing to be deallocated.
6697          */
6698         copy->type = VM_MAP_COPY_OBJECT;
6699         copy->cpy_object = VM_OBJECT_NULL;
6700
6701         /*
6702          * Return the new object.
6703          */
6704         return new_copy;
6705 }
6706
6707 static kern_return_t
6708 vm_map_overwrite_submap_recurse(
6709         vm_map_t        dst_map,
6710         vm_map_offset_t dst_addr,
6711         vm_map_size_t   dst_size)
6712 {
6713         vm_map_offset_t dst_end;
6714         vm_map_entry_t  tmp_entry;
6715         vm_map_entry_t  entry;
6716         kern_return_t   result;
6717         boolean_t       encountered_sub_map = FALSE;
6718
6719
6720
6721         /*
6722          *      Verify that the destination is all writeable
6723          *      initially.  We have to trunc the destination
6724          *      address and round the copy size or we'll end up
6725          *      splitting entries in strange ways.
6726          */
6727
6728         dst_end = vm_map_round_page(dst_addr + dst_size,
6729                                     VM_MAP_PAGE_MASK(dst_map));
6730         vm_map_lock(dst_map);
6731
6732 start_pass_1:
6733         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6734                 vm_map_unlock(dst_map);
6735                 return(KERN_INVALID_ADDRESS);
6736         }
6737
6738         vm_map_clip_start(dst_map,
6739                           tmp_entry,
6740                           vm_map_trunc_page(dst_addr,
6741                                             VM_MAP_PAGE_MASK(dst_map)));
6742         if (tmp_entry->is_sub_map) {
6743                 /* clipping did unnest if needed */
6744                 assert(!tmp_entry->use_pmap);
6745         }
6746
6747         for (entry = tmp_entry;;) {
6748                 vm_map_entry_t  next;
6749
6750                 next = entry->vme_next;
6751                 while(entry->is_sub_map) {
6752                         vm_map_offset_t sub_start;
6753                         vm_map_offset_t sub_end;
6754                         vm_map_offset_t local_end;
6755
6756                         if (entry->in_transition) {
6757                                 /*
6758                                  * Say that we are waiting, and wait for entry.
6759                                  */
6760                                 entry->needs_wakeup = TRUE;
6761                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6762
6763                                 goto start_pass_1;
6764                         }
6765
6766                         encountered_sub_map = TRUE;
6767                         sub_start = VME_OFFSET(entry);
6768
6769                         if(entry->vme_end < dst_end)
6770                                 sub_end = entry->vme_end;
6771                         else
6772                                 sub_end = dst_end;
6773                         sub_end -= entry->vme_start;
6774                         sub_end += VME_OFFSET(entry);
6775                         local_end = entry->vme_end;
6776                         vm_map_unlock(dst_map);
6777
6778                         result = vm_map_overwrite_submap_recurse(
6779                                 VME_SUBMAP(entry),
6780                                 sub_start,
6781                                 sub_end - sub_start);
6782
6783                         if(result != KERN_SUCCESS)
6784                                 return result;
6785                         if (dst_end <= entry->vme_end)
6786                                 return KERN_SUCCESS;
6787                         vm_map_lock(dst_map);
6788                         if(!vm_map_lookup_entry(dst_map, local_end,
6789                                                 &tmp_entry)) {
6790                                 vm_map_unlock(dst_map);
6791                                 return(KERN_INVALID_ADDRESS);
6792                         }
6793                         entry = tmp_entry;
6794                         next = entry->vme_next;
6795                 }
6796
6797                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6798                         vm_map_unlock(dst_map);
6799                         return(KERN_PROTECTION_FAILURE);
6800                 }
6801
6802                 /*
6803                  *      If the entry is in transition, we must wait
6804                  *      for it to exit that state.  Anything could happen
6805                  *      when we unlock the map, so start over.
6806                  */
6807                 if (entry->in_transition) {
6808
6809                         /*
6810                          * Say that we are waiting, and wait for entry.
6811                          */
6812                         entry->needs_wakeup = TRUE;
6813                         vm_map_entry_wait(dst_map, THREAD_UNINT);
6814
6815                         goto start_pass_1;
6816                 }
6817
6818 /*
6819  *              our range is contained completely within this map entry
6820  */
6821                 if (dst_end <= entry->vme_end) {
6822                         vm_map_unlock(dst_map);
6823                         return KERN_SUCCESS;
6824                 }
6825 /*
6826  *              check that range specified is contiguous region
6827  */
6828                 if ((next == vm_map_to_entry(dst_map)) ||
6829                     (next->vme_start != entry->vme_end)) {
6830                         vm_map_unlock(dst_map);
6831                         return(KERN_INVALID_ADDRESS);
6832                 }
6833
6834                 /*
6835                  *      Check for permanent objects in the destination.
6836                  */
6837                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
6838                     ((!VME_OBJECT(entry)->internal) ||
6839                      (VME_OBJECT(entry)->true_share))) {
6840                         if(encountered_sub_map) {
6841                                 vm_map_unlock(dst_map);
6842                                 return(KERN_FAILURE);
6843                         }
6844                 }
6845
6846
6847                 entry = next;
6848         }/* for */
6849         vm_map_unlock(dst_map);
6850         return(KERN_SUCCESS);
6851 }
6852
6853 /*
6854  *      Routine:        vm_map_copy_overwrite
6855  *
6856  *      Description:
6857  *              Copy the memory described by the map copy
6858  *              object (copy; returned by vm_map_copyin) onto
6859  *              the specified destination region (dst_map, dst_addr).
6860  *              The destination must be writeable.
6861  *
6862  *              Unlike vm_map_copyout, this routine actually
6863  *              writes over previously-mapped memory.  If the
6864  *              previous mapping was to a permanent (user-supplied)
6865  *              memory object, it is preserved.
6866  *
6867  *              The attributes (protection and inheritance) of the
6868  *              destination region are preserved.
6869  *
6870  *              If successful, consumes the copy object.
6871  *              Otherwise, the caller is responsible for it.
6872  *
6873  *      Implementation notes:
6874  *              To overwrite aligned temporary virtual memory, it is
6875  *              sufficient to remove the previous mapping and insert
6876  *              the new copy.  This replacement is done either on
6877  *              the whole region (if no permanent virtual memory
6878  *              objects are embedded in the destination region) or
6879  *              in individual map entries.
6880  *
6881  *              To overwrite permanent virtual memory , it is necessary
6882  *              to copy each page, as the external memory management
6883  *              interface currently does not provide any optimizations.
6884  *
6885  *              Unaligned memory also has to be copied.  It is possible
6886  *              to use 'vm_trickery' to copy the aligned data.  This is
6887  *              not done but not hard to implement.
6888  *
6889  *              Once a page of permanent memory has been overwritten,
6890  *              it is impossible to interrupt this function; otherwise,
6891  *              the call would be neither atomic nor location-independent.
6892  *              The kernel-state portion of a user thread must be
6893  *              interruptible.
6894  *
6895  *              It may be expensive to forward all requests that might
6896  *              overwrite permanent memory (vm_write, vm_copy) to
6897  *              uninterruptible kernel threads.  This routine may be
6898  *              called by interruptible threads; however, success is
6899  *              not guaranteed -- if the request cannot be performed
6900  *              atomically and interruptibly, an error indication is
6901  *              returned.
6902  */
6903
6904 static kern_return_t
6905 vm_map_copy_overwrite_nested(
6906         vm_map_t                dst_map,
6907         vm_map_address_t        dst_addr,
6908         vm_map_copy_t           copy,
6909         boolean_t               interruptible,
6910         pmap_t                  pmap,
6911         boolean_t               discard_on_success)
6912 {
6913         vm_map_offset_t         dst_end;
6914         vm_map_entry_t          tmp_entry;
6915         vm_map_entry_t          entry;
6916         kern_return_t           kr;
6917         boolean_t               aligned = TRUE;
6918         boolean_t               contains_permanent_objects = FALSE;
6919         boolean_t               encountered_sub_map = FALSE;
6920         vm_map_offset_t         base_addr;
6921         vm_map_size_t           copy_size;
6922         vm_map_size_t           total_size;
6923
6924
6925         /*
6926          *      Check for null copy object.
6927          */
6928
6929         if (copy == VM_MAP_COPY_NULL)
6930                 return(KERN_SUCCESS);
6931
6932         /*
6933          *      Check for special kernel buffer allocated
6934          *      by new_ipc_kmsg_copyin.
6935          */
6936
6937         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6938                 return(vm_map_copyout_kernel_buffer(
6939                                dst_map, &dst_addr,
6940                                copy, TRUE, discard_on_success));
6941         }
6942
6943         /*
6944          *      Only works for entry lists at the moment.  Will
6945          *      support page lists later.
6946          */
6947
6948         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6949
6950         if (copy->size == 0) {
6951                 if (discard_on_success)
6952                         vm_map_copy_discard(copy);
6953                 return(KERN_SUCCESS);
6954         }
6955
6956         /*
6957          *      Verify that the destination is all writeable
6958          *      initially.  We have to trunc the destination
6959          *      address and round the copy size or we'll end up
6960          *      splitting entries in strange ways.
6961          */
6962
6963         if (!VM_MAP_PAGE_ALIGNED(copy->size,
6964                                  VM_MAP_PAGE_MASK(dst_map)) ||
6965             !VM_MAP_PAGE_ALIGNED(copy->offset,
6966                                  VM_MAP_PAGE_MASK(dst_map)) ||
6967             !VM_MAP_PAGE_ALIGNED(dst_addr,
6968                                  VM_MAP_PAGE_MASK(dst_map)))
6969         {
6970                 aligned = FALSE;
6971                 dst_end = vm_map_round_page(dst_addr + copy->size,
6972                                             VM_MAP_PAGE_MASK(dst_map));
6973         } else {
6974                 dst_end = dst_addr + copy->size;
6975         }
6976
6977         vm_map_lock(dst_map);
6978
6979         /* LP64todo - remove this check when vm_map_commpage64()
6980          * no longer has to stuff in a map_entry for the commpage
6981          * above the map's max_offset.
6982          */
6983         if (dst_addr >= dst_map->max_offset) {
6984                 vm_map_unlock(dst_map);
6985                 return(KERN_INVALID_ADDRESS);
6986         }
6987
6988 start_pass_1:
6989         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6990                 vm_map_unlock(dst_map);
6991                 return(KERN_INVALID_ADDRESS);
6992         }
6993         vm_map_clip_start(dst_map,
6994                           tmp_entry,
6995                           vm_map_trunc_page(dst_addr,
6996                                             VM_MAP_PAGE_MASK(dst_map)));
6997         for (entry = tmp_entry;;) {
6998                 vm_map_entry_t  next = entry->vme_next;
6999
7000                 while(entry->is_sub_map) {
7001                         vm_map_offset_t sub_start;
7002                         vm_map_offset_t sub_end;
7003                         vm_map_offset_t local_end;
7004
7005                         if (entry->in_transition) {
7006
7007                                 /*
7008                                  * Say that we are waiting, and wait for entry.
7009                                  */
7010                                 entry->needs_wakeup = TRUE;
7011                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7012
7013                                 goto start_pass_1;
7014                         }
7015
7016                         local_end = entry->vme_end;
7017                         if (!(entry->needs_copy)) {
7018                                 /* if needs_copy we are a COW submap */
7019                                 /* in such a case we just replace so */
7020                                 /* there is no need for the follow-  */
7021                                 /* ing check.                        */
7022                                 encountered_sub_map = TRUE;
7023                                 sub_start = VME_OFFSET(entry);
7024
7025                                 if(entry->vme_end < dst_end)
7026                                         sub_end = entry->vme_end;
7027                                 else
7028                                         sub_end = dst_end;
7029                                 sub_end -= entry->vme_start;
7030                                 sub_end += VME_OFFSET(entry);
7031                                 vm_map_unlock(dst_map);
7032
7033                                 kr = vm_map_overwrite_submap_recurse(
7034                                         VME_SUBMAP(entry),
7035                                         sub_start,
7036                                         sub_end - sub_start);
7037                                 if(kr != KERN_SUCCESS)
7038                                         return kr;
7039                                 vm_map_lock(dst_map);
7040                         }
7041
7042                         if (dst_end <= entry->vme_end)
7043                                 goto start_overwrite;
7044                         if(!vm_map_lookup_entry(dst_map, local_end,
7045                                                 &entry)) {
7046                                 vm_map_unlock(dst_map);
7047                                 return(KERN_INVALID_ADDRESS);
7048                         }
7049                         next = entry->vme_next;
7050                 }
7051
7052                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7053                         vm_map_unlock(dst_map);
7054                         return(KERN_PROTECTION_FAILURE);
7055                 }
7056
7057                 /*
7058                  *      If the entry is in transition, we must wait
7059                  *      for it to exit that state.  Anything could happen
7060                  *      when we unlock the map, so start over.
7061                  */
7062                 if (entry->in_transition) {
7063
7064                         /*
7065                          * Say that we are waiting, and wait for entry.
7066                          */
7067                         entry->needs_wakeup = TRUE;
7068                         vm_map_entry_wait(dst_map, THREAD_UNINT);
7069
7070                         goto start_pass_1;
7071                 }
7072
7073 /*
7074  *              our range is contained completely within this map entry
7075  */
7076                 if (dst_end <= entry->vme_end)
7077                         break;
7078 /*
7079  *              check that range specified is contiguous region
7080  */
7081                 if ((next == vm_map_to_entry(dst_map)) ||
7082                     (next->vme_start != entry->vme_end)) {
7083                         vm_map_unlock(dst_map);
7084                         return(KERN_INVALID_ADDRESS);
7085                 }
7086
7087
7088                 /*
7089                  *      Check for permanent objects in the destination.
7090                  */
7091                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
7092                     ((!VME_OBJECT(entry)->internal) ||
7093                      (VME_OBJECT(entry)->true_share))) {
7094                         contains_permanent_objects = TRUE;
7095                 }
7096
7097                 entry = next;
7098         }/* for */
7099
7100 start_overwrite:
7101         /*
7102          *      If there are permanent objects in the destination, then
7103          *      the copy cannot be interrupted.
7104          */
7105
7106         if (interruptible && contains_permanent_objects) {
7107                 vm_map_unlock(dst_map);
7108                 return(KERN_FAILURE);   /* XXX */
7109         }
7110
7111         /*
7112          *
7113          *      Make a second pass, overwriting the data
7114          *      At the beginning of each loop iteration,
7115          *      the next entry to be overwritten is "tmp_entry"
7116          *      (initially, the value returned from the lookup above),
7117          *      and the starting address expected in that entry
7118          *      is "start".
7119          */
7120
7121         total_size = copy->size;
7122         if(encountered_sub_map) {
7123                 copy_size = 0;
7124                 /* re-calculate tmp_entry since we've had the map */
7125                 /* unlocked */
7126                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
7127                         vm_map_unlock(dst_map);
7128                         return(KERN_INVALID_ADDRESS);
7129                 }
7130         } else {
7131                 copy_size = copy->size;
7132         }
7133
7134         base_addr = dst_addr;
7135         while(TRUE) {
7136                 /* deconstruct the copy object and do in parts */
7137                 /* only in sub_map, interruptable case */
7138                 vm_map_entry_t  copy_entry;
7139                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
7140                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
7141                 int             nentries;
7142                 int             remaining_entries = 0;
7143                 vm_map_offset_t new_offset = 0;
7144
7145                 for (entry = tmp_entry; copy_size == 0;) {
7146                         vm_map_entry_t  next;
7147
7148                         next = entry->vme_next;
7149
7150                         /* tmp_entry and base address are moved along */
7151                         /* each time we encounter a sub-map.  Otherwise */
7152                         /* entry can outpase tmp_entry, and the copy_size */
7153                         /* may reflect the distance between them */
7154                         /* if the current entry is found to be in transition */
7155                         /* we will start over at the beginning or the last */
7156                         /* encounter of a submap as dictated by base_addr */
7157                         /* we will zero copy_size accordingly. */
7158                         if (entry->in_transition) {
7159                                 /*
7160                                  * Say that we are waiting, and wait for entry.
7161                                  */
7162                                 entry->needs_wakeup = TRUE;
7163                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7164
7165                                 if(!vm_map_lookup_entry(dst_map, base_addr,
7166                                                         &tmp_entry)) {
7167                                         vm_map_unlock(dst_map);
7168                                         return(KERN_INVALID_ADDRESS);
7169                                 }
7170                                 copy_size = 0;
7171                                 entry = tmp_entry;
7172                                 continue;
7173                         }
7174                         if(entry->is_sub_map) {
7175                                 vm_map_offset_t sub_start;
7176                                 vm_map_offset_t sub_end;
7177                                 vm_map_offset_t local_end;
7178
7179                                 if (entry->needs_copy) {
7180                                         /* if this is a COW submap */
7181                                         /* just back the range with a */
7182                                         /* anonymous entry */
7183                                         if(entry->vme_end < dst_end)
7184                                                 sub_end = entry->vme_end;
7185                                         else
7186                                                 sub_end = dst_end;
7187                                         if(entry->vme_start < base_addr)
7188                                                 sub_start = base_addr;
7189                                         else
7190                                                 sub_start = entry->vme_start;
7191                                         vm_map_clip_end(
7192                                                 dst_map, entry, sub_end);
7193                                         vm_map_clip_start(
7194                                                 dst_map, entry, sub_start);
7195                                         assert(!entry->use_pmap);
7196                                         entry->is_sub_map = FALSE;
7197                                         vm_map_deallocate(
7198                                                 VME_SUBMAP(entry));
7199                                         VME_SUBMAP_SET(entry, NULL);
7200                                         entry->is_shared = FALSE;
7201                                         entry->needs_copy = FALSE;
7202                                         VME_OFFSET_SET(entry, 0);
7203                                         /*
7204                                          * XXX FBDP
7205                                          * We should propagate the protections
7206                                          * of the submap entry here instead
7207                                          * of forcing them to VM_PROT_ALL...
7208                                          * Or better yet, we should inherit
7209                                          * the protection of the copy_entry.
7210                                          */
7211                                         entry->protection = VM_PROT_ALL;
7212                                         entry->max_protection = VM_PROT_ALL;
7213                                         entry->wired_count = 0;
7214                                         entry->user_wired_count = 0;
7215                                         if(entry->inheritance
7216                                            == VM_INHERIT_SHARE)
7217                                                 entry->inheritance = VM_INHERIT_COPY;
7218                                         continue;
7219                                 }
7220                                 /* first take care of any non-sub_map */
7221                                 /* entries to send */
7222                                 if(base_addr < entry->vme_start) {
7223                                         /* stuff to send */
7224                                         copy_size =
7225                                                 entry->vme_start - base_addr;
7226                                         break;
7227                                 }
7228                                 sub_start = VME_OFFSET(entry);
7229
7230                                 if(entry->vme_end < dst_end)
7231                                         sub_end = entry->vme_end;
7232                                 else
7233                                         sub_end = dst_end;
7234                                 sub_end -= entry->vme_start;
7235                                 sub_end += VME_OFFSET(entry);
7236                                 local_end = entry->vme_end;
7237                                 vm_map_unlock(dst_map);
7238                                 copy_size = sub_end - sub_start;
7239
7240                                 /* adjust the copy object */
7241                                 if (total_size > copy_size) {
7242                                         vm_map_size_t   local_size = 0;
7243                                         vm_map_size_t   entry_size;
7244
7245                                         nentries = 1;
7246                                         new_offset = copy->offset;
7247                                         copy_entry = vm_map_copy_first_entry(copy);
7248                                         while(copy_entry !=
7249                                               vm_map_copy_to_entry(copy)){
7250                                                 entry_size = copy_entry->vme_end -
7251                                                         copy_entry->vme_start;
7252                                                 if((local_size < copy_size) &&
7253                                                    ((local_size + entry_size)
7254                                                     >= copy_size)) {
7255                                                         vm_map_copy_clip_end(copy,
7256                                                                              copy_entry,
7257                                                                              copy_entry->vme_start +
7258                                                                              (copy_size - local_size));
7259                                                         entry_size = copy_entry->vme_end -
7260                                                                 copy_entry->vme_start;
7261                                                         local_size += entry_size;
7262                                                         new_offset += entry_size;
7263                                                 }
7264                                                 if(local_size >= copy_size) {
7265                                                         next_copy = copy_entry->vme_next;
7266                                                         copy_entry->vme_next =
7267                                                                 vm_map_copy_to_entry(copy);
7268                                                         previous_prev =
7269                                                                 copy->cpy_hdr.links.prev;
7270                                                         copy->cpy_hdr.links.prev = copy_entry;
7271                                                         copy->size = copy_size;
7272                                                         remaining_entries =
7273                                                                 copy->cpy_hdr.nentries;
7274                                                         remaining_entries -= nentries;
7275                                                         copy->cpy_hdr.nentries = nentries;
7276                                                         break;
7277                                                 } else {
7278                                                         local_size += entry_size;
7279                                                         new_offset += entry_size;
7280                                                         nentries++;
7281                                                 }
7282                                                 copy_entry = copy_entry->vme_next;
7283                                         }
7284                                 }
7285
7286                                 if((entry->use_pmap) && (pmap == NULL)) {
7287                                         kr = vm_map_copy_overwrite_nested(
7288                                                 VME_SUBMAP(entry),
7289                                                 sub_start,
7290                                                 copy,
7291                                                 interruptible,
7292                                                 VME_SUBMAP(entry)->pmap,
7293                                                 TRUE);
7294                                 } else if (pmap != NULL) {
7295                                         kr = vm_map_copy_overwrite_nested(
7296                                                 VME_SUBMAP(entry),
7297                                                 sub_start,
7298                                                 copy,
7299                                                 interruptible, pmap,
7300                                                 TRUE);
7301                                 } else {
7302                                         kr = vm_map_copy_overwrite_nested(
7303                                                 VME_SUBMAP(entry),
7304                                                 sub_start,
7305                                                 copy,
7306                                                 interruptible,
7307                                                 dst_map->pmap,
7308                                                 TRUE);
7309                                 }
7310                                 if(kr != KERN_SUCCESS) {
7311                                         if(next_copy != NULL) {
7312                                                 copy->cpy_hdr.nentries +=
7313                                                         remaining_entries;
7314                                                 copy->cpy_hdr.links.prev->vme_next =
7315                                                         next_copy;
7316                                                 copy->cpy_hdr.links.prev
7317                                                         = previous_prev;
7318                                                 copy->size = total_size;
7319                                         }
7320                                         return kr;
7321                                 }
7322                                 if (dst_end <= local_end) {
7323                                         return(KERN_SUCCESS);
7324                                 }
7325                                 /* otherwise copy no longer exists, it was */
7326                                 /* destroyed after successful copy_overwrite */
7327                                 copy = (vm_map_copy_t)
7328                                         zalloc(vm_map_copy_zone);
7329                                 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7330                                 vm_map_copy_first_entry(copy) =
7331                                         vm_map_copy_last_entry(copy) =
7332                                         vm_map_copy_to_entry(copy);
7333                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
7334                                 copy->offset = new_offset;
7335
7336                                 /*
7337                                  * XXX FBDP
7338                                  * this does not seem to deal with
7339                                  * the VM map store (R&B tree)
7340                                  */
7341
7342                                 total_size -= copy_size;
7343                                 copy_size = 0;
7344                                 /* put back remainder of copy in container */
7345                                 if(next_copy != NULL) {
7346                                         copy->cpy_hdr.nentries = remaining_entries;
7347                                         copy->cpy_hdr.links.next = next_copy;
7348                                         copy->cpy_hdr.links.prev = previous_prev;
7349                                         copy->size = total_size;
7350                                         next_copy->vme_prev =
7351                                                 vm_map_copy_to_entry(copy);
7352                                         next_copy = NULL;
7353                                 }
7354                                 base_addr = local_end;
7355                                 vm_map_lock(dst_map);
7356                                 if(!vm_map_lookup_entry(dst_map,
7357                                                         local_end, &tmp_entry)) {
7358                                         vm_map_unlock(dst_map);
7359                                         return(KERN_INVALID_ADDRESS);
7360                                 }
7361                                 entry = tmp_entry;
7362                                 continue;
7363                         }
7364                         if (dst_end <= entry->vme_end) {
7365                                 copy_size = dst_end - base_addr;
7366                                 break;
7367                         }
7368
7369                         if ((next == vm_map_to_entry(dst_map)) ||
7370                             (next->vme_start != entry->vme_end)) {
7371                                 vm_map_unlock(dst_map);
7372                                 return(KERN_INVALID_ADDRESS);
7373                         }
7374
7375                         entry = next;
7376                 }/* for */
7377
7378                 next_copy = NULL;
7379                 nentries = 1;
7380
7381                 /* adjust the copy object */
7382                 if (total_size > copy_size) {
7383                         vm_map_size_t   local_size = 0;
7384                         vm_map_size_t   entry_size;
7385
7386                         new_offset = copy->offset;
7387                         copy_entry = vm_map_copy_first_entry(copy);
7388                         while(copy_entry != vm_map_copy_to_entry(copy)) {
7389                                 entry_size = copy_entry->vme_end -
7390                                         copy_entry->vme_start;
7391                                 if((local_size < copy_size) &&
7392                                    ((local_size + entry_size)
7393                                     >= copy_size)) {
7394                                         vm_map_copy_clip_end(copy, copy_entry,
7395                                                              copy_entry->vme_start +
7396                                                              (copy_size - local_size));
7397                                         entry_size = copy_entry->vme_end -
7398                                                 copy_entry->vme_start;
7399                                         local_size += entry_size;
7400                                         new_offset += entry_size;
7401                                 }
7402                                 if(local_size >= copy_size) {
7403                                         next_copy = copy_entry->vme_next;
7404                                         copy_entry->vme_next =
7405                                                 vm_map_copy_to_entry(copy);
7406                                         previous_prev =
7407                                                 copy->cpy_hdr.links.prev;
7408                                         copy->cpy_hdr.links.prev = copy_entry;
7409                                         copy->size = copy_size;
7410                                         remaining_entries =
7411                                                 copy->cpy_hdr.nentries;
7412                                         remaining_entries -= nentries;
7413                                         copy->cpy_hdr.nentries = nentries;
7414                                         break;
7415                                 } else {
7416                                         local_size += entry_size;
7417                                         new_offset += entry_size;
7418                                         nentries++;
7419                                 }
7420                                 copy_entry = copy_entry->vme_next;
7421                         }
7422                 }
7423
7424                 if (aligned) {
7425                         pmap_t  local_pmap;
7426
7427                         if(pmap)
7428                                 local_pmap = pmap;
7429                         else
7430                                 local_pmap = dst_map->pmap;
7431
7432                         if ((kr =  vm_map_copy_overwrite_aligned(
7433                                      dst_map, tmp_entry, copy,
7434                                      base_addr, local_pmap)) != KERN_SUCCESS) {
7435                                 if(next_copy != NULL) {
7436                                         copy->cpy_hdr.nentries +=
7437                                                 remaining_entries;
7438                                         copy->cpy_hdr.links.prev->vme_next =
7439                                                 next_copy;
7440                                         copy->cpy_hdr.links.prev =
7441                                                 previous_prev;
7442                                         copy->size += copy_size;
7443                                 }
7444                                 return kr;
7445                         }
7446                         vm_map_unlock(dst_map);
7447                 } else {
7448                         /*
7449                          * Performance gain:
7450                          *
7451                          * if the copy and dst address are misaligned but the same
7452                          * offset within the page we can copy_not_aligned the
7453                          * misaligned parts and copy aligned the rest.  If they are
7454                          * aligned but len is unaligned we simply need to copy
7455                          * the end bit unaligned.  We'll need to split the misaligned
7456                          * bits of the region in this case !
7457                          */
7458                         /* ALWAYS UNLOCKS THE dst_map MAP */
7459                         kr = vm_map_copy_overwrite_unaligned(
7460                                 dst_map,
7461                                 tmp_entry,
7462                                 copy,
7463                                 base_addr,
7464                                 discard_on_success);
7465                         if (kr != KERN_SUCCESS) {
7466                                 if(next_copy != NULL) {
7467                                         copy->cpy_hdr.nentries +=
7468                                                 remaining_entries;
7469                                         copy->cpy_hdr.links.prev->vme_next =
7470                                                 next_copy;
7471                                         copy->cpy_hdr.links.prev =
7472                                                 previous_prev;
7473                                         copy->size += copy_size;
7474                                 }
7475                                 return kr;
7476                         }
7477                 }
7478                 total_size -= copy_size;
7479                 if(total_size == 0)
7480                         break;
7481                 base_addr += copy_size;
7482                 copy_size = 0;
7483                 copy->offset = new_offset;
7484                 if(next_copy != NULL) {
7485                         copy->cpy_hdr.nentries = remaining_entries;
7486                         copy->cpy_hdr.links.next = next_copy;
7487                         copy->cpy_hdr.links.prev = previous_prev;
7488                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
7489                         copy->size = total_size;
7490                 }
7491                 vm_map_lock(dst_map);
7492                 while(TRUE) {
7493                         if (!vm_map_lookup_entry(dst_map,
7494                                                  base_addr, &tmp_entry)) {
7495                                 vm_map_unlock(dst_map);
7496                                 return(KERN_INVALID_ADDRESS);
7497                         }
7498                         if (tmp_entry->in_transition) {
7499                                 entry->needs_wakeup = TRUE;
7500                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7501                         } else {
7502                                 break;
7503                         }
7504                 }
7505                 vm_map_clip_start(dst_map,
7506                                   tmp_entry,
7507                                   vm_map_trunc_page(base_addr,
7508                                                     VM_MAP_PAGE_MASK(dst_map)));
7509
7510                 entry = tmp_entry;
7511         } /* while */
7512
7513         /*
7514          *      Throw away the vm_map_copy object
7515          */
7516         if (discard_on_success)
7517                 vm_map_copy_discard(copy);
7518
7519         return(KERN_SUCCESS);
7520 }/* vm_map_copy_overwrite */
7521
7522 kern_return_t
7523 vm_map_copy_overwrite(
7524         vm_map_t        dst_map,
7525         vm_map_offset_t dst_addr,
7526         vm_map_copy_t   copy,
7527         boolean_t       interruptible)
7528 {
7529         vm_map_size_t   head_size, tail_size;
7530         vm_map_copy_t   head_copy, tail_copy;
7531         vm_map_offset_t head_addr, tail_addr;
7532         vm_map_entry_t  entry;
7533         kern_return_t   kr;
7534
7535         head_size = 0;
7536         tail_size = 0;
7537         head_copy = NULL;
7538         tail_copy = NULL;
7539         head_addr = 0;
7540         tail_addr = 0;
7541
7542         if (interruptible ||
7543             copy == VM_MAP_COPY_NULL ||
7544             copy->type != VM_MAP_COPY_ENTRY_LIST) {
7545                 /*
7546                  * We can't split the "copy" map if we're interruptible
7547                  * or if we don't have a "copy" map...
7548                  */
7549         blunt_copy:
7550                 return vm_map_copy_overwrite_nested(dst_map,
7551                                                     dst_addr,
7552                                                     copy,
7553                                                     interruptible,
7554                                                     (pmap_t) NULL,
7555                                                     TRUE);
7556         }
7557
7558         if (copy->size < 3 * PAGE_SIZE) {
7559                 /*
7560                  * Too small to bother with optimizing...
7561                  */
7562                 goto blunt_copy;
7563         }
7564
7565         if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7566             (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7567                 /*
7568                  * Incompatible mis-alignment of source and destination...
7569                  */
7570                 goto blunt_copy;
7571         }
7572
7573         /*
7574          * Proper alignment or identical mis-alignment at the beginning.
7575          * Let's try and do a small unaligned copy first (if needed)
7576          * and then an aligned copy for the rest.
7577          */
7578         if (!page_aligned(dst_addr)) {
7579                 head_addr = dst_addr;
7580                 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7581                              (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7582         }
7583         if (!page_aligned(copy->offset + copy->size)) {
7584                 /*
7585                  * Mis-alignment at the end.
7586                  * Do an aligned copy up to the last page and
7587                  * then an unaligned copy for the remaining bytes.
7588                  */
7589                 tail_size = ((copy->offset + copy->size) &
7590                              VM_MAP_PAGE_MASK(dst_map));
7591                 tail_addr = dst_addr + copy->size - tail_size;
7592         }
7593
7594         if (head_size + tail_size == copy->size) {
7595                 /*
7596                  * It's all unaligned, no optimization possible...
7597                  */
7598                 goto blunt_copy;
7599         }
7600
7601         /*
7602          * Can't optimize if there are any submaps in the
7603          * destination due to the way we free the "copy" map
7604          * progressively in vm_map_copy_overwrite_nested()
7605          * in that case.
7606          */
7607         vm_map_lock_read(dst_map);
7608         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7609                 vm_map_unlock_read(dst_map);
7610                 goto blunt_copy;
7611         }
7612         for (;
7613              (entry != vm_map_copy_to_entry(copy) &&
7614               entry->vme_start < dst_addr + copy->size);
7615              entry = entry->vme_next) {
7616                 if (entry->is_sub_map) {
7617                         vm_map_unlock_read(dst_map);
7618                         goto blunt_copy;
7619                 }
7620         }
7621         vm_map_unlock_read(dst_map);
7622
7623         if (head_size) {
7624                 /*
7625                  * Unaligned copy of the first "head_size" bytes, to reach
7626                  * a page boundary.
7627                  */
7628
7629                 /*
7630                  * Extract "head_copy" out of "copy".
7631                  */
7632                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7633                 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7634                 vm_map_copy_first_entry(head_copy) =
7635                         vm_map_copy_to_entry(head_copy);
7636                 vm_map_copy_last_entry(head_copy) =
7637                         vm_map_copy_to_entry(head_copy);
7638                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7639                 head_copy->cpy_hdr.nentries = 0;
7640                 head_copy->cpy_hdr.entries_pageable =
7641                         copy->cpy_hdr.entries_pageable;
7642                 vm_map_store_init(&head_copy->cpy_hdr);
7643
7644                 head_copy->offset = copy->offset;
7645                 head_copy->size = head_size;
7646
7647                 copy->offset += head_size;
7648                 copy->size -= head_size;
7649
7650                 entry = vm_map_copy_first_entry(copy);
7651                 vm_map_copy_clip_end(copy, entry, copy->offset);
7652                 vm_map_copy_entry_unlink(copy, entry);
7653                 vm_map_copy_entry_link(head_copy,
7654                                        vm_map_copy_to_entry(head_copy),
7655                                        entry);
7656
7657                 /*
7658                  * Do the unaligned copy.
7659                  */
7660                 kr = vm_map_copy_overwrite_nested(dst_map,
7661                                                   head_addr,
7662                                                   head_copy,
7663                                                   interruptible,
7664                                                   (pmap_t) NULL,
7665                                                   FALSE);
7666                 if (kr != KERN_SUCCESS)
7667                         goto done;
7668         }
7669
7670         if (tail_size) {
7671                 /*
7672                  * Extract "tail_copy" out of "copy".
7673                  */
7674                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7675                 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7676                 vm_map_copy_first_entry(tail_copy) =
7677                         vm_map_copy_to_entry(tail_copy);
7678                 vm_map_copy_last_entry(tail_copy) =
7679                         vm_map_copy_to_entry(tail_copy);
7680                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7681                 tail_copy->cpy_hdr.nentries = 0;
7682                 tail_copy->cpy_hdr.entries_pageable =
7683                         copy->cpy_hdr.entries_pageable;
7684                 vm_map_store_init(&tail_copy->cpy_hdr);
7685
7686                 tail_copy->offset = copy->offset + copy->size - tail_size;
7687                 tail_copy->size = tail_size;
7688
7689                 copy->size -= tail_size;
7690
7691                 entry = vm_map_copy_last_entry(copy);
7692                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7693                 entry = vm_map_copy_last_entry(copy);
7694                 vm_map_copy_entry_unlink(copy, entry);
7695                 vm_map_copy_entry_link(tail_copy,
7696                                        vm_map_copy_last_entry(tail_copy),
7697                                        entry);
7698         }
7699
7700         /*
7701          * Copy most (or possibly all) of the data.
7702          */
7703         kr = vm_map_copy_overwrite_nested(dst_map,
7704                                           dst_addr + head_size,
7705                                           copy,
7706                                           interruptible,
7707                                           (pmap_t) NULL,
7708                                           FALSE);
7709         if (kr != KERN_SUCCESS) {
7710                 goto done;
7711         }
7712
7713         if (tail_size) {
7714                 kr = vm_map_copy_overwrite_nested(dst_map,
7715                                                   tail_addr,
7716                                                   tail_copy,
7717                                                   interruptible,
7718                                                   (pmap_t) NULL,
7719                                                   FALSE);
7720         }
7721
7722 done:
7723         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7724         if (kr == KERN_SUCCESS) {
7725                 /*
7726                  * Discard all the copy maps.
7727                  */
7728                 if (head_copy) {
7729                         vm_map_copy_discard(head_copy);
7730                         head_copy = NULL;
7731                 }
7732                 vm_map_copy_discard(copy);
7733                 if (tail_copy) {
7734                         vm_map_copy_discard(tail_copy);
7735                         tail_copy = NULL;
7736                 }
7737         } else {
7738                 /*
7739                  * Re-assemble the original copy map.
7740                  */
7741                 if (head_copy) {
7742                         entry = vm_map_copy_first_entry(head_copy);
7743                         vm_map_copy_entry_unlink(head_copy, entry);
7744                         vm_map_copy_entry_link(copy,
7745                                                vm_map_copy_to_entry(copy),
7746                                                entry);
7747                         copy->offset -= head_size;
7748                         copy->size += head_size;
7749                         vm_map_copy_discard(head_copy);
7750                         head_copy = NULL;
7751                 }
7752                 if (tail_copy) {
7753                         entry = vm_map_copy_last_entry(tail_copy);
7754                         vm_map_copy_entry_unlink(tail_copy, entry);
7755                         vm_map_copy_entry_link(copy,
7756                                                vm_map_copy_last_entry(copy),
7757                                                entry);
7758                         copy->size += tail_size;
7759                         vm_map_copy_discard(tail_copy);
7760                         tail_copy = NULL;
7761                 }
7762         }
7763         return kr;
7764 }
7765
7766
7767 /*
7768  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
7769  *
7770  *      Decription:
7771  *      Physically copy unaligned data
7772  *
7773  *      Implementation:
7774  *      Unaligned parts of pages have to be physically copied.  We use
7775  *      a modified form of vm_fault_copy (which understands none-aligned
7776  *      page offsets and sizes) to do the copy.  We attempt to copy as
7777  *      much memory in one go as possibly, however vm_fault_copy copies
7778  *      within 1 memory object so we have to find the smaller of "amount left"
7779  *      "source object data size" and "target object data size".  With
7780  *      unaligned data we don't need to split regions, therefore the source
7781  *      (copy) object should be one map entry, the target range may be split
7782  *      over multiple map entries however.  In any event we are pessimistic
7783  *      about these assumptions.
7784  *
7785  *      Assumptions:
7786  *      dst_map is locked on entry and is return locked on success,
7787  *      unlocked on error.
7788  */
7789
7790 static kern_return_t
7791 vm_map_copy_overwrite_unaligned(
7792         vm_map_t        dst_map,
7793         vm_map_entry_t  entry,
7794         vm_map_copy_t   copy,
7795         vm_map_offset_t start,
7796         boolean_t       discard_on_success)
7797 {
7798         vm_map_entry_t          copy_entry;
7799         vm_map_entry_t          copy_entry_next;
7800         vm_map_version_t        version;
7801         vm_object_t             dst_object;
7802         vm_object_offset_t      dst_offset;
7803         vm_object_offset_t      src_offset;
7804         vm_object_offset_t      entry_offset;
7805         vm_map_offset_t         entry_end;
7806         vm_map_size_t           src_size,
7807                                 dst_size,
7808                                 copy_size,
7809                                 amount_left;
7810         kern_return_t           kr = KERN_SUCCESS;
7811
7812
7813         copy_entry = vm_map_copy_first_entry(copy);
7814
7815         vm_map_lock_write_to_read(dst_map);
7816
7817         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7818         amount_left = copy->size;
7819 /*
7820  *      unaligned so we never clipped this entry, we need the offset into
7821  *      the vm_object not just the data.
7822  */
7823         while (amount_left > 0) {
7824
7825                 if (entry == vm_map_to_entry(dst_map)) {
7826                         vm_map_unlock_read(dst_map);
7827                         return KERN_INVALID_ADDRESS;
7828                 }
7829
7830                 /* "start" must be within the current map entry */
7831                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7832
7833                 dst_offset = start - entry->vme_start;
7834
7835                 dst_size = entry->vme_end - start;
7836
7837                 src_size = copy_entry->vme_end -
7838                         (copy_entry->vme_start + src_offset);
7839
7840                 if (dst_size < src_size) {
7841 /*
7842  *                      we can only copy dst_size bytes before
7843  *                      we have to get the next destination entry
7844  */
7845                         copy_size = dst_size;
7846                 } else {
7847 /*
7848  *                      we can only copy src_size bytes before
7849  *                      we have to get the next source copy entry
7850  */
7851                         copy_size = src_size;
7852                 }
7853
7854                 if (copy_size > amount_left) {
7855                         copy_size = amount_left;
7856                 }
7857 /*
7858  *              Entry needs copy, create a shadow shadow object for
7859  *              Copy on write region.
7860  */
7861                 if (entry->needs_copy &&
7862                     ((entry->protection & VM_PROT_WRITE) != 0))
7863                 {
7864                         if (vm_map_lock_read_to_write(dst_map)) {
7865                                 vm_map_lock_read(dst_map);
7866                                 goto RetryLookup;
7867                         }
7868                         VME_OBJECT_SHADOW(entry,
7869                                           (vm_map_size_t)(entry->vme_end
7870                                                           - entry->vme_start));
7871                         entry->needs_copy = FALSE;
7872                         vm_map_lock_write_to_read(dst_map);
7873                 }
7874                 dst_object = VME_OBJECT(entry);
7875 /*
7876  *              unlike with the virtual (aligned) copy we're going
7877  *              to fault on it therefore we need a target object.
7878  */
7879                 if (dst_object == VM_OBJECT_NULL) {
7880                         if (vm_map_lock_read_to_write(dst_map)) {
7881                                 vm_map_lock_read(dst_map);
7882                                 goto RetryLookup;
7883                         }
7884                         dst_object = vm_object_allocate((vm_map_size_t)
7885                                                         entry->vme_end - entry->vme_start);
7886                         VME_OBJECT(entry) = dst_object;
7887                         VME_OFFSET_SET(entry, 0);
7888                         assert(entry->use_pmap);
7889                         vm_map_lock_write_to_read(dst_map);
7890                 }
7891 /*
7892  *              Take an object reference and unlock map. The "entry" may
7893  *              disappear or change when the map is unlocked.
7894  */
7895                 vm_object_reference(dst_object);
7896                 version.main_timestamp = dst_map->timestamp;
7897                 entry_offset = VME_OFFSET(entry);
7898                 entry_end = entry->vme_end;
7899                 vm_map_unlock_read(dst_map);
7900 /*
7901  *              Copy as much as possible in one pass
7902  */
7903                 kr = vm_fault_copy(
7904                         VME_OBJECT(copy_entry),
7905                         VME_OFFSET(copy_entry) + src_offset,
7906                         &copy_size,
7907                         dst_object,
7908                         entry_offset + dst_offset,
7909                         dst_map,
7910                         &version,
7911                         THREAD_UNINT );
7912
7913                 start += copy_size;
7914                 src_offset += copy_size;
7915                 amount_left -= copy_size;
7916 /*
7917  *              Release the object reference
7918  */
7919                 vm_object_deallocate(dst_object);
7920 /*
7921  *              If a hard error occurred, return it now
7922  */
7923                 if (kr != KERN_SUCCESS)
7924                         return kr;
7925
7926                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7927                     || amount_left == 0)
7928                 {
7929 /*
7930  *                      all done with this copy entry, dispose.
7931  */
7932                         copy_entry_next = copy_entry->vme_next;
7933
7934                         if (discard_on_success) {
7935                                 vm_map_copy_entry_unlink(copy, copy_entry);
7936                                 assert(!copy_entry->is_sub_map);
7937                                 vm_object_deallocate(VME_OBJECT(copy_entry));
7938                                 vm_map_copy_entry_dispose(copy, copy_entry);
7939                         }
7940
7941                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7942                             amount_left) {
7943 /*
7944  *                              not finished copying but run out of source
7945  */
7946                                 return KERN_INVALID_ADDRESS;
7947                         }
7948
7949                         copy_entry = copy_entry_next;
7950
7951                         src_offset = 0;
7952                 }
7953
7954                 if (amount_left == 0)
7955                         return KERN_SUCCESS;
7956
7957                 vm_map_lock_read(dst_map);
7958                 if (version.main_timestamp == dst_map->timestamp) {
7959                         if (start == entry_end) {
7960 /*
7961  *                              destination region is split.  Use the version
7962  *                              information to avoid a lookup in the normal
7963  *                              case.
7964  */
7965                                 entry = entry->vme_next;
7966 /*
7967  *                              should be contiguous. Fail if we encounter
7968  *                              a hole in the destination.
7969  */
7970                                 if (start != entry->vme_start) {
7971                                         vm_map_unlock_read(dst_map);
7972                                         return KERN_INVALID_ADDRESS ;
7973                                 }
7974                         }
7975                 } else {
7976 /*
7977  *                      Map version check failed.
7978  *                      we must lookup the entry because somebody
7979  *                      might have changed the map behind our backs.
7980  */
7981                 RetryLookup:
7982                         if (!vm_map_lookup_entry(dst_map, start, &entry))
7983                         {
7984                                 vm_map_unlock_read(dst_map);
7985                                 return KERN_INVALID_ADDRESS ;
7986                         }
7987                 }
7988         }/* while */
7989
7990         return KERN_SUCCESS;
7991 }/* vm_map_copy_overwrite_unaligned */
7992
7993 /*
7994  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
7995  *
7996  *      Description:
7997  *      Does all the vm_trickery possible for whole pages.
7998  *
7999  *      Implementation:
8000  *
8001  *      If there are no permanent objects in the destination,
8002  *      and the source and destination map entry zones match,
8003  *      and the destination map entry is not shared,
8004  *      then the map entries can be deleted and replaced
8005  *      with those from the copy.  The following code is the
8006  *      basic idea of what to do, but there are lots of annoying
8007  *      little details about getting protection and inheritance
8008  *      right.  Should add protection, inheritance, and sharing checks
8009  *      to the above pass and make sure that no wiring is involved.
8010  */
8011
8012 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
8013 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
8014 int vm_map_copy_overwrite_aligned_src_large = 0;
8015
8016 static kern_return_t
8017 vm_map_copy_overwrite_aligned(
8018         vm_map_t        dst_map,
8019         vm_map_entry_t  tmp_entry,
8020         vm_map_copy_t   copy,
8021         vm_map_offset_t start,
8022         __unused pmap_t pmap)
8023 {
8024         vm_object_t     object;
8025         vm_map_entry_t  copy_entry;
8026         vm_map_size_t   copy_size;
8027         vm_map_size_t   size;
8028         vm_map_entry_t  entry;
8029
8030         while ((copy_entry = vm_map_copy_first_entry(copy))
8031                != vm_map_copy_to_entry(copy))
8032         {
8033                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
8034
8035                 entry = tmp_entry;
8036                 if (entry->is_sub_map) {
8037                         /* unnested when clipped earlier */
8038                         assert(!entry->use_pmap);
8039                 }
8040                 if (entry == vm_map_to_entry(dst_map)) {
8041                         vm_map_unlock(dst_map);
8042                         return KERN_INVALID_ADDRESS;
8043                 }
8044                 size = (entry->vme_end - entry->vme_start);
8045                 /*
8046                  *      Make sure that no holes popped up in the
8047                  *      address map, and that the protection is
8048                  *      still valid, in case the map was unlocked
8049                  *      earlier.
8050                  */
8051
8052                 if ((entry->vme_start != start) || ((entry->is_sub_map)
8053                                                     && !entry->needs_copy)) {
8054                         vm_map_unlock(dst_map);
8055                         return(KERN_INVALID_ADDRESS);
8056                 }
8057                 assert(entry != vm_map_to_entry(dst_map));
8058
8059                 /*
8060                  *      Check protection again
8061                  */
8062
8063                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8064                         vm_map_unlock(dst_map);
8065                         return(KERN_PROTECTION_FAILURE);
8066                 }
8067
8068                 /*
8069                  *      Adjust to source size first
8070                  */
8071
8072                 if (copy_size < size) {
8073                         if (entry->map_aligned &&
8074                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
8075                                                  VM_MAP_PAGE_MASK(dst_map))) {
8076                                 /* no longer map-aligned */
8077                                 entry->map_aligned = FALSE;
8078                         }
8079                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
8080                         size = copy_size;
8081                 }
8082
8083                 /*
8084                  *      Adjust to destination size
8085                  */
8086
8087                 if (size < copy_size) {
8088                         vm_map_copy_clip_end(copy, copy_entry,
8089                                              copy_entry->vme_start + size);
8090                         copy_size = size;
8091                 }
8092
8093                 assert((entry->vme_end - entry->vme_start) == size);
8094                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
8095                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
8096
8097                 /*
8098                  *      If the destination contains temporary unshared memory,
8099                  *      we can perform the copy by throwing it away and
8100                  *      installing the source data.
8101                  */
8102
8103                 object = VME_OBJECT(entry);
8104                 if ((!entry->is_shared &&
8105                      ((object == VM_OBJECT_NULL) ||
8106                       (object->internal && !object->true_share))) ||
8107                     entry->needs_copy) {
8108                         vm_object_t     old_object = VME_OBJECT(entry);
8109                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
8110                         vm_object_offset_t      offset;
8111
8112                         /*
8113                          * Ensure that the source and destination aren't
8114                          * identical
8115                          */
8116                         if (old_object == VME_OBJECT(copy_entry) &&
8117                             old_offset == VME_OFFSET(copy_entry)) {
8118                                 vm_map_copy_entry_unlink(copy, copy_entry);
8119                                 vm_map_copy_entry_dispose(copy, copy_entry);
8120
8121                                 if (old_object != VM_OBJECT_NULL)
8122                                         vm_object_deallocate(old_object);
8123
8124                                 start = tmp_entry->vme_end;
8125                                 tmp_entry = tmp_entry->vme_next;
8126                                 continue;
8127                         }
8128
8129 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
8130 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
8131                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
8132                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
8133                             copy_size <= __TRADEOFF1_COPY_SIZE) {
8134                                 /*
8135                                  * Virtual vs. Physical copy tradeoff #1.
8136                                  *
8137                                  * Copying only a few pages out of a large
8138                                  * object:  do a physical copy instead of
8139                                  * a virtual copy, to avoid possibly keeping
8140                                  * the entire large object alive because of
8141                                  * those few copy-on-write pages.
8142                                  */
8143                                 vm_map_copy_overwrite_aligned_src_large++;
8144                                 goto slow_copy;
8145                         }
8146
8147                         if ((dst_map->pmap != kernel_pmap) &&
8148                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
8149                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
8150                                 vm_object_t new_object, new_shadow;
8151
8152                                 /*
8153                                  * We're about to map something over a mapping
8154                                  * established by malloc()...
8155                                  */
8156                                 new_object = VME_OBJECT(copy_entry);
8157                                 if (new_object != VM_OBJECT_NULL) {
8158                                         vm_object_lock_shared(new_object);
8159                                 }
8160                                 while (new_object != VM_OBJECT_NULL &&
8161                                        !new_object->true_share &&
8162                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8163                                        new_object->internal) {
8164                                         new_shadow = new_object->shadow;
8165                                         if (new_shadow == VM_OBJECT_NULL) {
8166                                                 break;
8167                                         }
8168                                         vm_object_lock_shared(new_shadow);
8169                                         vm_object_unlock(new_object);
8170                                         new_object = new_shadow;
8171                                 }
8172                                 if (new_object != VM_OBJECT_NULL) {
8173                                         if (!new_object->internal) {
8174                                                 /*
8175                                                  * The new mapping is backed
8176                                                  * by an external object.  We
8177                                                  * don't want malloc'ed memory
8178                                                  * to be replaced with such a
8179                                                  * non-anonymous mapping, so
8180                                                  * let's go off the optimized
8181                                                  * path...
8182                                                  */
8183                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
8184                                                 vm_object_unlock(new_object);
8185                                                 goto slow_copy;
8186                                         }
8187                                         if (new_object->true_share ||
8188                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
8189                                                 /*
8190                                                  * Same if there's a "true_share"
8191                                                  * object in the shadow chain, or
8192                                                  * an object with a non-default
8193                                                  * (SYMMETRIC) copy strategy.
8194                                                  */
8195                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
8196                                                 vm_object_unlock(new_object);
8197                                                 goto slow_copy;
8198                                         }
8199                                         vm_object_unlock(new_object);
8200                                 }
8201                                 /*
8202                                  * The new mapping is still backed by
8203                                  * anonymous (internal) memory, so it's
8204                                  * OK to substitute it for the original
8205                                  * malloc() mapping.
8206                                  */
8207                         }
8208
8209                         if (old_object != VM_OBJECT_NULL) {
8210                                 if(entry->is_sub_map) {
8211                                         if(entry->use_pmap) {
8212 #ifndef NO_NESTED_PMAP
8213                                                 pmap_unnest(dst_map->pmap,
8214                                                             (addr64_t)entry->vme_start,
8215                                                             entry->vme_end - entry->vme_start);
8216 #endif  /* NO_NESTED_PMAP */
8217                                                 if(dst_map->mapped_in_other_pmaps) {
8218                                                         /* clean up parent */
8219                                                         /* map/maps */
8220                                                         vm_map_submap_pmap_clean(
8221                                                                 dst_map, entry->vme_start,
8222                                                                 entry->vme_end,
8223                                                                 VME_SUBMAP(entry),
8224                                                                 VME_OFFSET(entry));
8225                                                 }
8226                                         } else {
8227                                                 vm_map_submap_pmap_clean(
8228                                                         dst_map, entry->vme_start,
8229                                                         entry->vme_end,
8230                                                         VME_SUBMAP(entry),
8231                                                         VME_OFFSET(entry));
8232                                         }
8233                                         vm_map_deallocate(VME_SUBMAP(entry));
8234                                 } else {
8235                                         if(dst_map->mapped_in_other_pmaps) {
8236                                                 vm_object_pmap_protect_options(
8237                                                         VME_OBJECT(entry),
8238                                                         VME_OFFSET(entry),
8239                                                         entry->vme_end
8240                                                         - entry->vme_start,
8241                                                         PMAP_NULL,
8242                                                         entry->vme_start,
8243                                                         VM_PROT_NONE,
8244                                                         PMAP_OPTIONS_REMOVE);
8245                                         } else {
8246                                                 pmap_remove_options(
8247                                                         dst_map->pmap,
8248                                                         (addr64_t)(entry->vme_start),
8249                                                         (addr64_t)(entry->vme_end),
8250                                                         PMAP_OPTIONS_REMOVE);
8251                                         }
8252                                         vm_object_deallocate(old_object);
8253                                 }
8254                         }
8255
8256                         entry->is_sub_map = FALSE;
8257                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
8258                         object = VME_OBJECT(entry);
8259                         entry->needs_copy = copy_entry->needs_copy;
8260                         entry->wired_count = 0;
8261                         entry->user_wired_count = 0;
8262                         offset = VME_OFFSET(copy_entry);
8263                         VME_OFFSET_SET(entry, offset);
8264
8265                         vm_map_copy_entry_unlink(copy, copy_entry);
8266                         vm_map_copy_entry_dispose(copy, copy_entry);
8267
8268                         /*
8269                          * we could try to push pages into the pmap at this point, BUT
8270                          * this optimization only saved on average 2 us per page if ALL
8271                          * the pages in the source were currently mapped
8272                          * and ALL the pages in the dest were touched, if there were fewer
8273                          * than 2/3 of the pages touched, this optimization actually cost more cycles
8274                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
8275                          */
8276
8277                         /*
8278                          *      Set up for the next iteration.  The map
8279                          *      has not been unlocked, so the next
8280                          *      address should be at the end of this
8281                          *      entry, and the next map entry should be
8282                          *      the one following it.
8283                          */
8284
8285                         start = tmp_entry->vme_end;
8286                         tmp_entry = tmp_entry->vme_next;
8287                 } else {
8288                         vm_map_version_t        version;
8289                         vm_object_t             dst_object;
8290                         vm_object_offset_t      dst_offset;
8291                         kern_return_t           r;
8292
8293                 slow_copy:
8294                         if (entry->needs_copy) {
8295                                 VME_OBJECT_SHADOW(entry,
8296                                                   (entry->vme_end -
8297                                                    entry->vme_start));
8298                                 entry->needs_copy = FALSE;
8299                         }
8300
8301                         dst_object = VME_OBJECT(entry);
8302                         dst_offset = VME_OFFSET(entry);
8303
8304                         /*
8305                          *      Take an object reference, and record
8306                          *      the map version information so that the
8307                          *      map can be safely unlocked.
8308                          */
8309
8310                         if (dst_object == VM_OBJECT_NULL) {
8311                                 /*
8312                                  * We would usually have just taken the
8313                                  * optimized path above if the destination
8314                                  * object has not been allocated yet.  But we
8315                                  * now disable that optimization if the copy
8316                                  * entry's object is not backed by anonymous
8317                                  * memory to avoid replacing malloc'ed
8318                                  * (i.e. re-usable) anonymous memory with a
8319                                  * not-so-anonymous mapping.
8320                                  * So we have to handle this case here and
8321                                  * allocate a new VM object for this map entry.
8322                                  */
8323                                 dst_object = vm_object_allocate(
8324                                         entry->vme_end - entry->vme_start);
8325                                 dst_offset = 0;
8326                                 VME_OBJECT_SET(entry, dst_object);
8327                                 VME_OFFSET_SET(entry, dst_offset);
8328                                 assert(entry->use_pmap);
8329
8330                         }
8331
8332                         vm_object_reference(dst_object);
8333
8334                         /* account for unlock bumping up timestamp */
8335                         version.main_timestamp = dst_map->timestamp + 1;
8336
8337                         vm_map_unlock(dst_map);
8338
8339                         /*
8340                          *      Copy as much as possible in one pass
8341                          */
8342
8343                         copy_size = size;
8344                         r = vm_fault_copy(
8345                                 VME_OBJECT(copy_entry),
8346                                 VME_OFFSET(copy_entry),
8347                                 &copy_size,
8348                                 dst_object,
8349                                 dst_offset,
8350                                 dst_map,
8351                                 &version,
8352                                 THREAD_UNINT );
8353
8354                         /*
8355                          *      Release the object reference
8356                          */
8357
8358                         vm_object_deallocate(dst_object);
8359
8360                         /*
8361                          *      If a hard error occurred, return it now
8362                          */
8363
8364                         if (r != KERN_SUCCESS)
8365                                 return(r);
8366
8367                         if (copy_size != 0) {
8368                                 /*
8369                                  *      Dispose of the copied region
8370                                  */
8371
8372                                 vm_map_copy_clip_end(copy, copy_entry,
8373                                                      copy_entry->vme_start + copy_size);
8374                                 vm_map_copy_entry_unlink(copy, copy_entry);
8375                                 vm_object_deallocate(VME_OBJECT(copy_entry));
8376                                 vm_map_copy_entry_dispose(copy, copy_entry);
8377                         }
8378
8379                         /*
8380                          *      Pick up in the destination map where we left off.
8381                          *
8382                          *      Use the version information to avoid a lookup
8383                          *      in the normal case.
8384                          */
8385
8386                         start += copy_size;
8387                         vm_map_lock(dst_map);
8388                         if (version.main_timestamp == dst_map->timestamp &&
8389                             copy_size != 0) {
8390                                 /* We can safely use saved tmp_entry value */
8391
8392                                 if (tmp_entry->map_aligned &&
8393                                     !VM_MAP_PAGE_ALIGNED(
8394                                             start,
8395                                             VM_MAP_PAGE_MASK(dst_map))) {
8396                                         /* no longer map-aligned */
8397                                         tmp_entry->map_aligned = FALSE;
8398                                 }
8399                                 vm_map_clip_end(dst_map, tmp_entry, start);
8400                                 tmp_entry = tmp_entry->vme_next;
8401                         } else {
8402                                 /* Must do lookup of tmp_entry */
8403
8404                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
8405                                         vm_map_unlock(dst_map);
8406                                         return(KERN_INVALID_ADDRESS);
8407                                 }
8408                                 if (tmp_entry->map_aligned &&
8409                                     !VM_MAP_PAGE_ALIGNED(
8410                                             start,
8411                                             VM_MAP_PAGE_MASK(dst_map))) {
8412                                         /* no longer map-aligned */
8413                                         tmp_entry->map_aligned = FALSE;
8414                                 }
8415                                 vm_map_clip_start(dst_map, tmp_entry, start);
8416                         }
8417                 }
8418         }/* while */
8419
8420         return(KERN_SUCCESS);
8421 }/* vm_map_copy_overwrite_aligned */
8422
8423 /*
8424  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
8425  *
8426  *      Description:
8427  *              Copy in data to a kernel buffer from space in the
8428  *              source map. The original space may be optionally
8429  *              deallocated.
8430  *
8431  *              If successful, returns a new copy object.
8432  */
8433 static kern_return_t
8434 vm_map_copyin_kernel_buffer(
8435         vm_map_t        src_map,
8436         vm_map_offset_t src_addr,
8437         vm_map_size_t   len,
8438         boolean_t       src_destroy,
8439         vm_map_copy_t   *copy_result)
8440 {
8441         kern_return_t kr;
8442         vm_map_copy_t copy;
8443         vm_size_t kalloc_size;
8444
8445         if (len > msg_ool_size_small)
8446                 return KERN_INVALID_ARGUMENT;
8447
8448         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
8449
8450         copy = (vm_map_copy_t)kalloc(kalloc_size);
8451         if (copy == VM_MAP_COPY_NULL)
8452                 return KERN_RESOURCE_SHORTAGE;
8453         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
8454         copy->size = len;
8455         copy->offset = 0;
8456
8457         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
8458         if (kr != KERN_SUCCESS) {
8459                 kfree(copy, kalloc_size);
8460                 return kr;
8461         }
8462         if (src_destroy) {
8463                 (void) vm_map_remove(
8464                         src_map,
8465                         vm_map_trunc_page(src_addr,
8466                                           VM_MAP_PAGE_MASK(src_map)),
8467                         vm_map_round_page(src_addr + len,
8468                                           VM_MAP_PAGE_MASK(src_map)),
8469                         (VM_MAP_REMOVE_INTERRUPTIBLE |
8470                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
8471                          (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
8472         }
8473         *copy_result = copy;
8474         return KERN_SUCCESS;
8475 }
8476
8477 /*
8478  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
8479  *
8480  *      Description:
8481  *              Copy out data from a kernel buffer into space in the
8482  *              destination map. The space may be otpionally dynamically
8483  *              allocated.
8484  *
8485  *              If successful, consumes the copy object.
8486  *              Otherwise, the caller is responsible for it.
8487  */
8488 static int vm_map_copyout_kernel_buffer_failures = 0;
8489 static kern_return_t
8490 vm_map_copyout_kernel_buffer(
8491         vm_map_t                map,
8492         vm_map_address_t        *addr,  /* IN/OUT */
8493         vm_map_copy_t           copy,
8494         boolean_t               overwrite,
8495         boolean_t               consume_on_success)
8496 {
8497         kern_return_t kr = KERN_SUCCESS;
8498         thread_t thread = current_thread();
8499
8500         /*
8501          * check for corrupted vm_map_copy structure
8502          */
8503         if (copy->size > msg_ool_size_small || copy->offset)
8504                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8505                       (long long)copy->size, (long long)copy->offset);
8506
8507         if (!overwrite) {
8508
8509                 /*
8510                  * Allocate space in the target map for the data
8511                  */
8512                 *addr = 0;
8513                 kr = vm_map_enter(map,
8514                                   addr,
8515                                   vm_map_round_page(copy->size,
8516                                                     VM_MAP_PAGE_MASK(map)),
8517                                   (vm_map_offset_t) 0,
8518                                   VM_FLAGS_ANYWHERE,
8519                                   VM_OBJECT_NULL,
8520                                   (vm_object_offset_t) 0,
8521                                   FALSE,
8522                                   VM_PROT_DEFAULT,
8523                                   VM_PROT_ALL,
8524                                   VM_INHERIT_DEFAULT);
8525                 if (kr != KERN_SUCCESS)
8526                         return kr;
8527         }
8528
8529         /*
8530          * Copyout the data from the kernel buffer to the target map.
8531          */
8532         if (thread->map == map) {
8533
8534                 /*
8535                  * If the target map is the current map, just do
8536                  * the copy.
8537                  */
8538                 assert((vm_size_t) copy->size == copy->size);
8539                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8540                         kr = KERN_INVALID_ADDRESS;
8541                 }
8542         }
8543         else {
8544                 vm_map_t oldmap;
8545
8546                 /*
8547                  * If the target map is another map, assume the
8548                  * target's address space identity for the duration
8549                  * of the copy.
8550                  */
8551                 vm_map_reference(map);
8552                 oldmap = vm_map_switch(map);
8553
8554                 assert((vm_size_t) copy->size == copy->size);
8555                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8556                         vm_map_copyout_kernel_buffer_failures++;
8557                         kr = KERN_INVALID_ADDRESS;
8558                 }
8559
8560                 (void) vm_map_switch(oldmap);
8561                 vm_map_deallocate(map);
8562         }
8563
8564         if (kr != KERN_SUCCESS) {
8565                 /* the copy failed, clean up */
8566                 if (!overwrite) {
8567                         /*
8568                          * Deallocate the space we allocated in the target map.
8569                          */
8570                         (void) vm_map_remove(
8571                                 map,
8572                                 vm_map_trunc_page(*addr,
8573                                                   VM_MAP_PAGE_MASK(map)),
8574                                 vm_map_round_page((*addr +
8575                                                    vm_map_round_page(copy->size,
8576                                                                      VM_MAP_PAGE_MASK(map))),
8577                                                   VM_MAP_PAGE_MASK(map)),
8578                                 VM_MAP_NO_FLAGS);
8579                         *addr = 0;
8580                 }
8581         } else {
8582                 /* copy was successful, dicard the copy structure */
8583                 if (consume_on_success) {
8584                         kfree(copy, copy->size + cpy_kdata_hdr_sz);
8585                 }
8586         }
8587
8588         return kr;
8589 }
8590
8591 /*
8592  *      Macro:          vm_map_copy_insert
8593  *
8594  *      Description:
8595  *              Link a copy chain ("copy") into a map at the
8596  *              specified location (after "where").
8597  *      Side effects:
8598  *              The copy chain is destroyed.
8599  *      Warning:
8600  *              The arguments are evaluated multiple times.
8601  */
8602 #define vm_map_copy_insert(map, where, copy)                            \
8603 MACRO_BEGIN                                                             \
8604         vm_map_store_copy_insert(map, where, copy);       \
8605         zfree(vm_map_copy_zone, copy);          \
8606 MACRO_END
8607
8608 void
8609 vm_map_copy_remap(
8610         vm_map_t        map,
8611         vm_map_entry_t  where,
8612         vm_map_copy_t   copy,
8613         vm_map_offset_t adjustment,
8614         vm_prot_t       cur_prot,
8615         vm_prot_t       max_prot,
8616         vm_inherit_t    inheritance)
8617 {
8618         vm_map_entry_t  copy_entry, new_entry;
8619
8620         for (copy_entry = vm_map_copy_first_entry(copy);
8621              copy_entry != vm_map_copy_to_entry(copy);
8622              copy_entry = copy_entry->vme_next) {
8623                 /* get a new VM map entry for the map */
8624                 new_entry = vm_map_entry_create(map,
8625                                                 !map->hdr.entries_pageable);
8626                 /* copy the "copy entry" to the new entry */
8627                 vm_map_entry_copy(new_entry, copy_entry);
8628                 /* adjust "start" and "end" */
8629                 new_entry->vme_start += adjustment;
8630                 new_entry->vme_end += adjustment;
8631                 /* clear some attributes */
8632                 new_entry->inheritance = inheritance;
8633                 new_entry->protection = cur_prot;
8634                 new_entry->max_protection = max_prot;
8635                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8636                 /* take an extra reference on the entry's "object" */
8637                 if (new_entry->is_sub_map) {
8638                         assert(!new_entry->use_pmap); /* not nested */
8639                         vm_map_lock(VME_SUBMAP(new_entry));
8640                         vm_map_reference(VME_SUBMAP(new_entry));
8641                         vm_map_unlock(VME_SUBMAP(new_entry));
8642                 } else {
8643                         vm_object_reference(VME_OBJECT(new_entry));
8644                 }
8645                 /* insert the new entry in the map */
8646                 vm_map_store_entry_link(map, where, new_entry);
8647                 /* continue inserting the "copy entries" after the new entry */
8648                 where = new_entry;
8649         }
8650 }
8651
8652
8653 boolean_t
8654 vm_map_copy_validate_size(
8655         vm_map_t                dst_map,
8656         vm_map_copy_t           copy,
8657         vm_map_size_t           size)
8658 {
8659         if (copy == VM_MAP_COPY_NULL)
8660                 return FALSE;
8661         switch (copy->type) {
8662         case VM_MAP_COPY_OBJECT:
8663         case VM_MAP_COPY_KERNEL_BUFFER:
8664                 if (size == copy->size)
8665                         return TRUE;
8666                 break;
8667         case VM_MAP_COPY_ENTRY_LIST:
8668                 /*
8669                  * potential page-size rounding prevents us from exactly
8670                  * validating this flavor of vm_map_copy, but we can at least
8671                  * assert that it's within a range.
8672                  */
8673                 if (copy->size >= size &&
8674                     copy->size <= vm_map_round_page(size,
8675                                                     VM_MAP_PAGE_MASK(dst_map)))
8676                         return TRUE;
8677                 break;
8678         default:
8679                 break;
8680         }
8681         return FALSE;
8682 }
8683
8684
8685 /*
8686  *      Routine:        vm_map_copyout
8687  *
8688  *      Description:
8689  *              Copy out a copy chain ("copy") into newly-allocated
8690  *              space in the destination map.
8691  *
8692  *              If successful, consumes the copy object.
8693  *              Otherwise, the caller is responsible for it.
8694  */
8695
8696 kern_return_t
8697 vm_map_copyout(
8698         vm_map_t                dst_map,
8699         vm_map_address_t        *dst_addr,      /* OUT */
8700         vm_map_copy_t           copy)
8701 {
8702         return vm_map_copyout_internal(dst_map, dst_addr, copy,
8703                                        TRUE, /* consume_on_success */
8704                                        VM_PROT_DEFAULT,
8705                                        VM_PROT_ALL,
8706                                        VM_INHERIT_DEFAULT);
8707 }
8708
8709 kern_return_t
8710 vm_map_copyout_internal(
8711         vm_map_t                dst_map,
8712         vm_map_address_t        *dst_addr,      /* OUT */
8713         vm_map_copy_t           copy,
8714         boolean_t               consume_on_success,
8715         vm_prot_t               cur_protection,
8716         vm_prot_t               max_protection,
8717         vm_inherit_t            inheritance)
8718 {
8719         vm_map_size_t           size;
8720         vm_map_size_t           adjustment;
8721         vm_map_offset_t         start;
8722         vm_object_offset_t      vm_copy_start;
8723         vm_map_entry_t          last;
8724         vm_map_entry_t          entry;
8725         vm_map_entry_t          hole_entry;
8726
8727         /*
8728          *      Check for null copy object.
8729          */
8730
8731         if (copy == VM_MAP_COPY_NULL) {
8732                 *dst_addr = 0;
8733                 return(KERN_SUCCESS);
8734         }
8735
8736         /*
8737          *      Check for special copy object, created
8738          *      by vm_map_copyin_object.
8739          */
8740
8741         if (copy->type == VM_MAP_COPY_OBJECT) {
8742                 vm_object_t             object = copy->cpy_object;
8743                 kern_return_t           kr;
8744                 vm_object_offset_t      offset;
8745
8746                 offset = vm_object_trunc_page(copy->offset);
8747                 size = vm_map_round_page((copy->size +
8748                                           (vm_map_size_t)(copy->offset -
8749                                                           offset)),
8750                                          VM_MAP_PAGE_MASK(dst_map));
8751                 *dst_addr = 0;
8752                 kr = vm_map_enter(dst_map, dst_addr, size,
8753                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8754                                   object, offset, FALSE,
8755                                   VM_PROT_DEFAULT, VM_PROT_ALL,
8756                                   VM_INHERIT_DEFAULT);
8757                 if (kr != KERN_SUCCESS)
8758                         return(kr);
8759                 /* Account for non-pagealigned copy object */
8760                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8761                 if (consume_on_success)
8762                         zfree(vm_map_copy_zone, copy);
8763                 return(KERN_SUCCESS);
8764         }
8765
8766         /*
8767          *      Check for special kernel buffer allocated
8768          *      by new_ipc_kmsg_copyin.
8769          */
8770
8771         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8772                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8773                                                     copy, FALSE,
8774                                                     consume_on_success);
8775         }
8776
8777
8778         /*
8779          *      Find space for the data
8780          */
8781
8782         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8783                                           VM_MAP_COPY_PAGE_MASK(copy));
8784         size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8785                                  VM_MAP_COPY_PAGE_MASK(copy))
8786                 - vm_copy_start;
8787
8788
8789 StartAgain: ;
8790
8791         vm_map_lock(dst_map);
8792         if( dst_map->disable_vmentry_reuse == TRUE) {
8793                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8794                 last = entry;
8795         } else {
8796                 if (dst_map->holelistenabled) {
8797                         hole_entry = (vm_map_entry_t)dst_map->holes_list;
8798
8799                         if (hole_entry == NULL) {
8800                                 /*
8801                                  * No more space in the map?
8802                                  */
8803                                 vm_map_unlock(dst_map);
8804                                 return(KERN_NO_SPACE);
8805                         }
8806
8807                         last = hole_entry;
8808                         start = last->vme_start;
8809                 } else {
8810                         assert(first_free_is_valid(dst_map));
8811                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8812                         vm_map_min(dst_map) : last->vme_end;
8813                 }
8814                 start = vm_map_round_page(start,
8815                                           VM_MAP_PAGE_MASK(dst_map));
8816         }
8817
8818         while (TRUE) {
8819                 vm_map_entry_t  next = last->vme_next;
8820                 vm_map_offset_t end = start + size;
8821
8822                 if ((end > dst_map->max_offset) || (end < start)) {
8823                         if (dst_map->wait_for_space) {
8824                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8825                                         assert_wait((event_t) dst_map,
8826                                                     THREAD_INTERRUPTIBLE);
8827                                         vm_map_unlock(dst_map);
8828                                         thread_block(THREAD_CONTINUE_NULL);
8829                                         goto StartAgain;
8830                                 }
8831                         }
8832                         vm_map_unlock(dst_map);
8833                         return(KERN_NO_SPACE);
8834                 }
8835
8836                 if (dst_map->holelistenabled) {
8837                         if (last->vme_end >= end)
8838                                 break;
8839                 } else {
8840                         /*
8841                          *      If there are no more entries, we must win.
8842                          *
8843                          *      OR
8844                          *
8845                          *      If there is another entry, it must be
8846                          *      after the end of the potential new region.
8847                          */
8848
8849                         if (next == vm_map_to_entry(dst_map))
8850                                 break;
8851
8852                         if (next->vme_start >= end)
8853                                 break;
8854                 }
8855
8856                 last = next;
8857
8858                 if (dst_map->holelistenabled) {
8859                         if (last == (vm_map_entry_t) dst_map->holes_list) {
8860                                 /*
8861                                  * Wrapped around
8862                                  */
8863                                 vm_map_unlock(dst_map);
8864                                 return(KERN_NO_SPACE);
8865                         }
8866                         start = last->vme_start;
8867                 } else {
8868                         start = last->vme_end;
8869                 }
8870                 start = vm_map_round_page(start,
8871                                           VM_MAP_PAGE_MASK(dst_map));
8872         }
8873
8874         if (dst_map->holelistenabled) {
8875                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
8876                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
8877                 }
8878         }
8879
8880
8881         adjustment = start - vm_copy_start;
8882         if (! consume_on_success) {
8883                 /*
8884                  * We're not allowed to consume "copy", so we'll have to
8885                  * copy its map entries into the destination map below.
8886                  * No need to re-allocate map entries from the correct
8887                  * (pageable or not) zone, since we'll get new map entries
8888                  * during the transfer.
8889                  * We'll also adjust the map entries's "start" and "end"
8890                  * during the transfer, to keep "copy"'s entries consistent
8891                  * with its "offset".
8892                  */
8893                 goto after_adjustments;
8894         }
8895
8896         /*
8897          *      Since we're going to just drop the map
8898          *      entries from the copy into the destination
8899          *      map, they must come from the same pool.
8900          */
8901
8902         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8903                 /*
8904                  * Mismatches occur when dealing with the default
8905                  * pager.
8906                  */
8907                 zone_t          old_zone;
8908                 vm_map_entry_t  next, new;
8909
8910                 /*
8911                  * Find the zone that the copies were allocated from
8912                  */
8913
8914                 entry = vm_map_copy_first_entry(copy);
8915
8916                 /*
8917                  * Reinitialize the copy so that vm_map_copy_entry_link
8918                  * will work.
8919                  */
8920                 vm_map_store_copy_reset(copy, entry);
8921                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8922
8923                 /*
8924                  * Copy each entry.
8925                  */
8926                 while (entry != vm_map_copy_to_entry(copy)) {
8927                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8928                         vm_map_entry_copy_full(new, entry);
8929                         assert(!new->iokit_acct);
8930                         if (new->is_sub_map) {
8931                                 /* clr address space specifics */
8932                                 new->use_pmap = FALSE;
8933                         }
8934                         vm_map_copy_entry_link(copy,
8935                                                vm_map_copy_last_entry(copy),
8936                                                new);
8937                         next = entry->vme_next;
8938                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8939                         zfree(old_zone, entry);
8940                         entry = next;
8941                 }
8942         }
8943
8944         /*
8945          *      Adjust the addresses in the copy chain, and
8946          *      reset the region attributes.
8947          */
8948
8949         for (entry = vm_map_copy_first_entry(copy);
8950              entry != vm_map_copy_to_entry(copy);
8951              entry = entry->vme_next) {
8952                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8953                         /*
8954                          * We're injecting this copy entry into a map that
8955                          * has the standard page alignment, so clear
8956                          * "map_aligned" (which might have been inherited
8957                          * from the original map entry).
8958                          */
8959                         entry->map_aligned = FALSE;
8960                 }
8961
8962                 entry->vme_start += adjustment;
8963                 entry->vme_end += adjustment;
8964
8965                 if (entry->map_aligned) {
8966                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8967                                                    VM_MAP_PAGE_MASK(dst_map)));
8968                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8969                                                    VM_MAP_PAGE_MASK(dst_map)));
8970                 }
8971
8972                 entry->inheritance = VM_INHERIT_DEFAULT;
8973                 entry->protection = VM_PROT_DEFAULT;
8974                 entry->max_protection = VM_PROT_ALL;
8975                 entry->behavior = VM_BEHAVIOR_DEFAULT;
8976
8977                 /*
8978                  * If the entry is now wired,
8979                  * map the pages into the destination map.
8980                  */
8981                 if (entry->wired_count != 0) {
8982                         register vm_map_offset_t va;
8983                         vm_object_offset_t       offset;
8984                         register vm_object_t object;
8985                         vm_prot_t prot;
8986                         int     type_of_fault;
8987
8988                         object = VME_OBJECT(entry);
8989                         offset = VME_OFFSET(entry);
8990                         va = entry->vme_start;
8991
8992                         pmap_pageable(dst_map->pmap,
8993                                       entry->vme_start,
8994                                       entry->vme_end,
8995                                       TRUE);
8996
8997                         while (va < entry->vme_end) {
8998                                 register vm_page_t      m;
8999
9000                                 /*
9001                                  * Look up the page in the object.
9002                                  * Assert that the page will be found in the
9003                                  * top object:
9004                                  * either
9005                                  *      the object was newly created by
9006                                  *      vm_object_copy_slowly, and has
9007                                  *      copies of all of the pages from
9008                                  *      the source object
9009                                  * or
9010                                  *      the object was moved from the old
9011                                  *      map entry; because the old map
9012                                  *      entry was wired, all of the pages
9013                                  *      were in the top-level object.
9014                                  *      (XXX not true if we wire pages for
9015                                  *       reading)
9016                                  */
9017                                 vm_object_lock(object);
9018
9019                                 m = vm_page_lookup(object, offset);
9020                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
9021                                     m->absent)
9022                                         panic("vm_map_copyout: wiring %p", m);
9023
9024                                 /*
9025                                  * ENCRYPTED SWAP:
9026                                  * The page is assumed to be wired here, so it
9027                                  * shouldn't be encrypted.  Otherwise, we
9028                                  * couldn't enter it in the page table, since
9029                                  * we don't want the user to see the encrypted
9030                                  * data.
9031                                  */
9032                                 ASSERT_PAGE_DECRYPTED(m);
9033
9034                                 prot = entry->protection;
9035
9036                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
9037                                     prot)
9038                                         prot |= VM_PROT_EXECUTE;
9039
9040                                 type_of_fault = DBG_CACHE_HIT_FAULT;
9041
9042                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
9043                                                VM_PAGE_WIRED(m), FALSE, FALSE,
9044                                                FALSE, VME_ALIAS(entry),
9045                                                ((entry->iokit_acct ||
9046                                                  (!entry->is_sub_map &&
9047                                                   !entry->use_pmap))
9048                                                 ? PMAP_OPTIONS_ALT_ACCT
9049                                                 : 0),
9050                                                NULL, &type_of_fault);
9051
9052                                 vm_object_unlock(object);
9053
9054                                 offset += PAGE_SIZE_64;
9055                                 va += PAGE_SIZE;
9056                         }
9057                 }
9058         }
9059
9060 after_adjustments:
9061
9062         /*
9063          *      Correct the page alignment for the result
9064          */
9065
9066         *dst_addr = start + (copy->offset - vm_copy_start);
9067
9068         /*
9069          *      Update the hints and the map size
9070          */
9071
9072         if (consume_on_success) {
9073                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
9074         } else {
9075                 SAVE_HINT_MAP_WRITE(dst_map, last);
9076         }
9077
9078         dst_map->size += size;
9079
9080         /*
9081          *      Link in the copy
9082          */
9083
9084         if (consume_on_success) {
9085                 vm_map_copy_insert(dst_map, last, copy);
9086         } else {
9087                 vm_map_copy_remap(dst_map, last, copy, adjustment,
9088                                   cur_protection, max_protection,
9089                                   inheritance);
9090         }
9091
9092         vm_map_unlock(dst_map);
9093
9094         /*
9095          * XXX  If wiring_required, call vm_map_pageable
9096          */
9097
9098         return(KERN_SUCCESS);
9099 }
9100
9101 /*
9102  *      Routine:        vm_map_copyin
9103  *
9104  *      Description:
9105  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
9106  *
9107  */
9108
9109 #undef vm_map_copyin
9110
9111 kern_return_t
9112 vm_map_copyin(
9113         vm_map_t                        src_map,
9114         vm_map_address_t        src_addr,
9115         vm_map_size_t           len,
9116         boolean_t                       src_destroy,
9117         vm_map_copy_t           *copy_result)   /* OUT */
9118 {
9119         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
9120                                         FALSE, copy_result, FALSE));
9121 }
9122
9123 /*
9124  *      Routine:        vm_map_copyin_common
9125  *
9126  *      Description:
9127  *              Copy the specified region (src_addr, len) from the
9128  *              source address space (src_map), possibly removing
9129  *              the region from the source address space (src_destroy).
9130  *
9131  *      Returns:
9132  *              A vm_map_copy_t object (copy_result), suitable for
9133  *              insertion into another address space (using vm_map_copyout),
9134  *              copying over another address space region (using
9135  *              vm_map_copy_overwrite).  If the copy is unused, it
9136  *              should be destroyed (using vm_map_copy_discard).
9137  *
9138  *      In/out conditions:
9139  *              The source map should not be locked on entry.
9140  */
9141
9142 typedef struct submap_map {
9143         vm_map_t        parent_map;
9144         vm_map_offset_t base_start;
9145         vm_map_offset_t base_end;
9146         vm_map_size_t   base_len;
9147         struct submap_map *next;
9148 } submap_map_t;
9149
9150 kern_return_t
9151 vm_map_copyin_common(
9152         vm_map_t        src_map,
9153         vm_map_address_t src_addr,
9154         vm_map_size_t   len,
9155         boolean_t       src_destroy,
9156         __unused boolean_t      src_volatile,
9157         vm_map_copy_t   *copy_result,   /* OUT */
9158         boolean_t       use_maxprot)
9159 {
9160         int flags;
9161
9162         flags = 0;
9163         if (src_destroy) {
9164                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
9165         }
9166         if (use_maxprot) {
9167                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
9168         }
9169         return vm_map_copyin_internal(src_map,
9170                                       src_addr,
9171                                       len,
9172                                       flags,
9173                                       copy_result);
9174 }
9175 kern_return_t
9176 vm_map_copyin_internal(
9177         vm_map_t        src_map,
9178         vm_map_address_t src_addr,
9179         vm_map_size_t   len,
9180         int             flags,
9181         vm_map_copy_t   *copy_result)   /* OUT */
9182 {
9183         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
9184                                          * in multi-level lookup, this
9185                                          * entry contains the actual
9186                                          * vm_object/offset.
9187                                          */
9188         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
9189
9190         vm_map_offset_t src_start;      /* Start of current entry --
9191                                          * where copy is taking place now
9192                                          */
9193         vm_map_offset_t src_end;        /* End of entire region to be
9194                                          * copied */
9195         vm_map_offset_t src_base;
9196         vm_map_t        base_map = src_map;
9197         boolean_t       map_share=FALSE;
9198         submap_map_t    *parent_maps = NULL;
9199
9200         vm_map_copy_t   copy;           /* Resulting copy */
9201         vm_map_address_t copy_addr;
9202         vm_map_size_t   copy_size;
9203         boolean_t       src_destroy;
9204         boolean_t       use_maxprot;
9205
9206         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
9207                 return KERN_INVALID_ARGUMENT;
9208         }
9209
9210         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
9211         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
9212
9213         /*
9214          *      Check for copies of zero bytes.
9215          */
9216
9217         if (len == 0) {
9218                 *copy_result = VM_MAP_COPY_NULL;
9219                 return(KERN_SUCCESS);
9220         }
9221
9222         /*
9223          *      Check that the end address doesn't overflow
9224          */
9225         src_end = src_addr + len;
9226         if (src_end < src_addr)
9227                 return KERN_INVALID_ADDRESS;
9228
9229         /*
9230          * If the copy is sufficiently small, use a kernel buffer instead
9231          * of making a virtual copy.  The theory being that the cost of
9232          * setting up VM (and taking C-O-W faults) dominates the copy costs
9233          * for small regions.
9234          */
9235         if ((len < msg_ool_size_small) &&
9236             !use_maxprot &&
9237             !(flags & VM_MAP_COPYIN_ENTRY_LIST))
9238                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
9239                                                    src_destroy, copy_result);
9240
9241         /*
9242          *      Compute (page aligned) start and end of region
9243          */
9244         src_start = vm_map_trunc_page(src_addr,
9245                                       VM_MAP_PAGE_MASK(src_map));
9246         src_end = vm_map_round_page(src_end,
9247                                     VM_MAP_PAGE_MASK(src_map));
9248
9249         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
9250
9251         /*
9252          *      Allocate a header element for the list.
9253          *
9254          *      Use the start and end in the header to
9255          *      remember the endpoints prior to rounding.
9256          */
9257
9258         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9259         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
9260         vm_map_copy_first_entry(copy) =
9261                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9262         copy->type = VM_MAP_COPY_ENTRY_LIST;
9263         copy->cpy_hdr.nentries = 0;
9264         copy->cpy_hdr.entries_pageable = TRUE;
9265 #if 00
9266         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
9267 #else
9268         /*
9269          * The copy entries can be broken down for a variety of reasons,
9270          * so we can't guarantee that they will remain map-aligned...
9271          * Will need to adjust the first copy_entry's "vme_start" and
9272          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
9273          * rather than the original map's alignment.
9274          */
9275         copy->cpy_hdr.page_shift = PAGE_SHIFT;
9276 #endif
9277
9278         vm_map_store_init( &(copy->cpy_hdr) );
9279
9280         copy->offset = src_addr;
9281         copy->size = len;
9282
9283         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9284
9285 #define RETURN(x)                                               \
9286         MACRO_BEGIN                                             \
9287         vm_map_unlock(src_map);                                 \
9288         if(src_map != base_map)                                 \
9289                 vm_map_deallocate(src_map);                     \
9290         if (new_entry != VM_MAP_ENTRY_NULL)                     \
9291                 vm_map_copy_entry_dispose(copy,new_entry);      \
9292         vm_map_copy_discard(copy);                              \
9293         {                                                       \
9294                 submap_map_t    *_ptr;                          \
9295                                                                 \
9296                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
9297                         parent_maps=parent_maps->next;          \
9298                         if (_ptr->parent_map != base_map)       \
9299                                 vm_map_deallocate(_ptr->parent_map);    \
9300                         kfree(_ptr, sizeof(submap_map_t));      \
9301                 }                                               \
9302         }                                                       \
9303         MACRO_RETURN(x);                                        \
9304         MACRO_END
9305
9306         /*
9307          *      Find the beginning of the region.
9308          */
9309
9310         vm_map_lock(src_map);
9311
9312         /*
9313          * Lookup the original "src_addr" rather than the truncated
9314          * "src_start", in case "src_start" falls in a non-map-aligned
9315          * map entry *before* the map entry that contains "src_addr"...
9316          */
9317         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
9318                 RETURN(KERN_INVALID_ADDRESS);
9319         if(!tmp_entry->is_sub_map) {
9320                 /*
9321                  * ... but clip to the map-rounded "src_start" rather than
9322                  * "src_addr" to preserve map-alignment.  We'll adjust the
9323                  * first copy entry at the end, if needed.
9324                  */
9325                 vm_map_clip_start(src_map, tmp_entry, src_start);
9326         }
9327         if (src_start < tmp_entry->vme_start) {
9328                 /*
9329                  * Move "src_start" up to the start of the
9330                  * first map entry to copy.
9331                  */
9332                 src_start = tmp_entry->vme_start;
9333         }
9334         /* set for later submap fix-up */
9335         copy_addr = src_start;
9336
9337         /*
9338          *      Go through entries until we get to the end.
9339          */
9340
9341         while (TRUE) {
9342                 register
9343                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
9344                 vm_map_size_t   src_size;               /* Size of source
9345                                                          * map entry (in both
9346                                                          * maps)
9347                                                          */
9348
9349                 register
9350                 vm_object_t             src_object;     /* Object to copy */
9351                 vm_object_offset_t      src_offset;
9352
9353                 boolean_t       src_needs_copy;         /* Should source map
9354                                                          * be made read-only
9355                                                          * for copy-on-write?
9356                                                          */
9357
9358                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
9359
9360                 boolean_t       was_wired;              /* Was source wired? */
9361                 vm_map_version_t version;               /* Version before locks
9362                                                          * dropped to make copy
9363                                                          */
9364                 kern_return_t   result;                 /* Return value from
9365                                                          * copy_strategically.
9366                                                          */
9367                 while(tmp_entry->is_sub_map) {
9368                         vm_map_size_t submap_len;
9369                         submap_map_t *ptr;
9370
9371                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
9372                         ptr->next = parent_maps;
9373                         parent_maps = ptr;
9374                         ptr->parent_map = src_map;
9375                         ptr->base_start = src_start;
9376                         ptr->base_end = src_end;
9377                         submap_len = tmp_entry->vme_end - src_start;
9378                         if(submap_len > (src_end-src_start))
9379                                 submap_len = src_end-src_start;
9380                         ptr->base_len = submap_len;
9381
9382                         src_start -= tmp_entry->vme_start;
9383                         src_start += VME_OFFSET(tmp_entry);
9384                         src_end = src_start + submap_len;
9385                         src_map = VME_SUBMAP(tmp_entry);
9386                         vm_map_lock(src_map);
9387                         /* keep an outstanding reference for all maps in */
9388                         /* the parents tree except the base map */
9389                         vm_map_reference(src_map);
9390                         vm_map_unlock(ptr->parent_map);
9391                         if (!vm_map_lookup_entry(
9392                                     src_map, src_start, &tmp_entry))
9393                                 RETURN(KERN_INVALID_ADDRESS);
9394                         map_share = TRUE;
9395                         if(!tmp_entry->is_sub_map)
9396                                 vm_map_clip_start(src_map, tmp_entry, src_start);
9397                         src_entry = tmp_entry;
9398                 }
9399                 /* we are now in the lowest level submap... */
9400
9401                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
9402                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
9403                         /* This is not, supported for now.In future */
9404                         /* we will need to detect the phys_contig   */
9405                         /* condition and then upgrade copy_slowly   */
9406                         /* to do physical copy from the device mem  */
9407                         /* based object. We can piggy-back off of   */
9408                         /* the was wired boolean to set-up the      */
9409                         /* proper handling */
9410                         RETURN(KERN_PROTECTION_FAILURE);
9411                 }
9412                 /*
9413                  *      Create a new address map entry to hold the result.
9414                  *      Fill in the fields from the appropriate source entries.
9415                  *      We must unlock the source map to do this if we need
9416                  *      to allocate a map entry.
9417                  */
9418                 if (new_entry == VM_MAP_ENTRY_NULL) {
9419                         version.main_timestamp = src_map->timestamp;
9420                         vm_map_unlock(src_map);
9421
9422                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
9423
9424                         vm_map_lock(src_map);
9425                         if ((version.main_timestamp + 1) != src_map->timestamp) {
9426                                 if (!vm_map_lookup_entry(src_map, src_start,
9427                                                          &tmp_entry)) {
9428                                         RETURN(KERN_INVALID_ADDRESS);
9429                                 }
9430                                 if (!tmp_entry->is_sub_map)
9431                                         vm_map_clip_start(src_map, tmp_entry, src_start);
9432                                 continue; /* restart w/ new tmp_entry */
9433                         }
9434                 }
9435
9436                 /*
9437                  *      Verify that the region can be read.
9438                  */
9439                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
9440                      !use_maxprot) ||
9441                     (src_entry->max_protection & VM_PROT_READ) == 0)
9442                         RETURN(KERN_PROTECTION_FAILURE);
9443
9444                 /*
9445                  *      Clip against the endpoints of the entire region.
9446                  */
9447
9448                 vm_map_clip_end(src_map, src_entry, src_end);
9449
9450                 src_size = src_entry->vme_end - src_start;
9451                 src_object = VME_OBJECT(src_entry);
9452                 src_offset = VME_OFFSET(src_entry);
9453                 was_wired = (src_entry->wired_count != 0);
9454
9455                 vm_map_entry_copy(new_entry, src_entry);
9456                 if (new_entry->is_sub_map) {
9457                         /* clr address space specifics */
9458                         new_entry->use_pmap = FALSE;
9459                 }
9460
9461                 /*
9462                  *      Attempt non-blocking copy-on-write optimizations.
9463                  */
9464
9465                 if (src_destroy &&
9466                     (src_object == VM_OBJECT_NULL ||
9467                      (src_object->internal && !src_object->true_share
9468                       && !map_share))) {
9469                         /*
9470                          * If we are destroying the source, and the object
9471                          * is internal, we can move the object reference
9472                          * from the source to the copy.  The copy is
9473                          * copy-on-write only if the source is.
9474                          * We make another reference to the object, because
9475                          * destroying the source entry will deallocate it.
9476                          */
9477                         vm_object_reference(src_object);
9478
9479                         /*
9480                          * Copy is always unwired.  vm_map_copy_entry
9481                          * set its wired count to zero.
9482                          */
9483
9484                         goto CopySuccessful;
9485                 }
9486
9487
9488         RestartCopy:
9489                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
9490                     src_object, new_entry, VME_OBJECT(new_entry),
9491                     was_wired, 0);
9492                 if ((src_object == VM_OBJECT_NULL ||
9493                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
9494                     vm_object_copy_quickly(
9495                             &VME_OBJECT(new_entry),
9496                             src_offset,
9497                             src_size,
9498                             &src_needs_copy,
9499                             &new_entry_needs_copy)) {
9500
9501                         new_entry->needs_copy = new_entry_needs_copy;
9502
9503                         /*
9504                          *      Handle copy-on-write obligations
9505                          */
9506
9507                         if (src_needs_copy && !tmp_entry->needs_copy) {
9508                                 vm_prot_t prot;
9509
9510                                 prot = src_entry->protection & ~VM_PROT_WRITE;
9511
9512                                 if (override_nx(src_map, VME_ALIAS(src_entry))
9513                                     && prot)
9514                                         prot |= VM_PROT_EXECUTE;
9515
9516                                 vm_object_pmap_protect(
9517                                         src_object,
9518                                         src_offset,
9519                                         src_size,
9520                                         (src_entry->is_shared ?
9521                                          PMAP_NULL
9522                                          : src_map->pmap),
9523                                         src_entry->vme_start,
9524                                         prot);
9525
9526                                 assert(tmp_entry->wired_count == 0);
9527                                 tmp_entry->needs_copy = TRUE;
9528                         }
9529
9530                         /*
9531                          *      The map has never been unlocked, so it's safe
9532                          *      to move to the next entry rather than doing
9533                          *      another lookup.
9534                          */
9535
9536                         goto CopySuccessful;
9537                 }
9538
9539                 /*
9540                  *      Take an object reference, so that we may
9541                  *      release the map lock(s).
9542                  */
9543
9544                 assert(src_object != VM_OBJECT_NULL);
9545                 vm_object_reference(src_object);
9546
9547                 /*
9548                  *      Record the timestamp for later verification.
9549                  *      Unlock the map.
9550                  */
9551
9552                 version.main_timestamp = src_map->timestamp;
9553                 vm_map_unlock(src_map); /* Increments timestamp once! */
9554
9555                 /*
9556                  *      Perform the copy
9557                  */
9558
9559                 if (was_wired) {
9560                 CopySlowly:
9561                         vm_object_lock(src_object);
9562                         result = vm_object_copy_slowly(
9563                                 src_object,
9564                                 src_offset,
9565                                 src_size,
9566                                 THREAD_UNINT,
9567                                 &VME_OBJECT(new_entry));
9568                         VME_OFFSET_SET(new_entry, 0);
9569                         new_entry->needs_copy = FALSE;
9570
9571                 }
9572                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9573                          (tmp_entry->is_shared  || map_share)) {
9574                         vm_object_t new_object;
9575
9576                         vm_object_lock_shared(src_object);
9577                         new_object = vm_object_copy_delayed(
9578                                 src_object,
9579                                 src_offset,
9580                                 src_size,
9581                                 TRUE);
9582                         if (new_object == VM_OBJECT_NULL)
9583                                 goto CopySlowly;
9584
9585                         VME_OBJECT_SET(new_entry, new_object);
9586                         assert(new_entry->wired_count == 0);
9587                         new_entry->needs_copy = TRUE;
9588                         assert(!new_entry->iokit_acct);
9589                         assert(new_object->purgable == VM_PURGABLE_DENY);
9590                         new_entry->use_pmap = TRUE;
9591                         result = KERN_SUCCESS;
9592
9593                 } else {
9594                         vm_object_offset_t new_offset;
9595                         new_offset = VME_OFFSET(new_entry);
9596                         result = vm_object_copy_strategically(src_object,
9597                                                               src_offset,
9598                                                               src_size,
9599                                                               &VME_OBJECT(new_entry),
9600                                                               &new_offset,
9601                                                               &new_entry_needs_copy);
9602                         if (new_offset != VME_OFFSET(new_entry)) {
9603                                 VME_OFFSET_SET(new_entry, new_offset);
9604                         }
9605
9606                         new_entry->needs_copy = new_entry_needs_copy;
9607                 }
9608
9609                 if (result != KERN_SUCCESS &&
9610                     result != KERN_MEMORY_RESTART_COPY) {
9611                         vm_map_lock(src_map);
9612                         RETURN(result);
9613                 }
9614
9615                 /*
9616                  *      Throw away the extra reference
9617                  */
9618
9619                 vm_object_deallocate(src_object);
9620
9621                 /*
9622                  *      Verify that the map has not substantially
9623                  *      changed while the copy was being made.
9624                  */
9625
9626                 vm_map_lock(src_map);
9627
9628                 if ((version.main_timestamp + 1) == src_map->timestamp)
9629                         goto VerificationSuccessful;
9630
9631                 /*
9632                  *      Simple version comparison failed.
9633                  *
9634                  *      Retry the lookup and verify that the
9635                  *      same object/offset are still present.
9636                  *
9637                  *      [Note: a memory manager that colludes with
9638                  *      the calling task can detect that we have
9639                  *      cheated.  While the map was unlocked, the
9640                  *      mapping could have been changed and restored.]
9641                  */
9642
9643                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9644                         if (result != KERN_MEMORY_RESTART_COPY) {
9645                                 vm_object_deallocate(VME_OBJECT(new_entry));
9646                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
9647                                 assert(!new_entry->iokit_acct);
9648                                 new_entry->use_pmap = TRUE;
9649                         }
9650                         RETURN(KERN_INVALID_ADDRESS);
9651                 }
9652
9653                 src_entry = tmp_entry;
9654                 vm_map_clip_start(src_map, src_entry, src_start);
9655
9656                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9657                      !use_maxprot) ||
9658                     ((src_entry->max_protection & VM_PROT_READ) == 0))
9659                         goto VerificationFailed;
9660
9661                 if (src_entry->vme_end < new_entry->vme_end) {
9662                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9663                                                    VM_MAP_COPY_PAGE_MASK(copy)));
9664                         new_entry->vme_end = src_entry->vme_end;
9665                         src_size = new_entry->vme_end - src_start;
9666                 }
9667
9668                 if ((VME_OBJECT(src_entry) != src_object) ||
9669                     (VME_OFFSET(src_entry) != src_offset) ) {
9670
9671                         /*
9672                          *      Verification failed.
9673                          *
9674                          *      Start over with this top-level entry.
9675                          */
9676
9677                 VerificationFailed: ;
9678
9679                         vm_object_deallocate(VME_OBJECT(new_entry));
9680                         tmp_entry = src_entry;
9681                         continue;
9682                 }
9683
9684                 /*
9685                  *      Verification succeeded.
9686                  */
9687
9688         VerificationSuccessful: ;
9689
9690                 if (result == KERN_MEMORY_RESTART_COPY)
9691                         goto RestartCopy;
9692
9693                 /*
9694                  *      Copy succeeded.
9695                  */
9696
9697         CopySuccessful: ;
9698
9699                 /*
9700                  *      Link in the new copy entry.
9701                  */
9702
9703                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9704                                        new_entry);
9705
9706                 /*
9707                  *      Determine whether the entire region
9708                  *      has been copied.
9709                  */
9710                 src_base = src_start;
9711                 src_start = new_entry->vme_end;
9712                 new_entry = VM_MAP_ENTRY_NULL;
9713                 while ((src_start >= src_end) && (src_end != 0)) {
9714                         submap_map_t    *ptr;
9715
9716                         if (src_map == base_map) {
9717                                 /* back to the top */
9718                                 break;
9719                         }
9720
9721                         ptr = parent_maps;
9722                         assert(ptr != NULL);
9723                         parent_maps = parent_maps->next;
9724
9725                         /* fix up the damage we did in that submap */
9726                         vm_map_simplify_range(src_map,
9727                                               src_base,
9728                                               src_end);
9729
9730                         vm_map_unlock(src_map);
9731                         vm_map_deallocate(src_map);
9732                         vm_map_lock(ptr->parent_map);
9733                         src_map = ptr->parent_map;
9734                         src_base = ptr->base_start;
9735                         src_start = ptr->base_start + ptr->base_len;
9736                         src_end = ptr->base_end;
9737                         if (!vm_map_lookup_entry(src_map,
9738                                                  src_start,
9739                                                  &tmp_entry) &&
9740                             (src_end > src_start)) {
9741                                 RETURN(KERN_INVALID_ADDRESS);
9742                         }
9743                         kfree(ptr, sizeof(submap_map_t));
9744                         if (parent_maps == NULL)
9745                                 map_share = FALSE;
9746                         src_entry = tmp_entry->vme_prev;
9747                 }
9748
9749                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9750                     (src_start >= src_addr + len) &&
9751                     (src_addr + len != 0)) {
9752                         /*
9753                          * Stop copying now, even though we haven't reached
9754                          * "src_end".  We'll adjust the end of the last copy
9755                          * entry at the end, if needed.
9756                          *
9757                          * If src_map's aligment is different from the
9758                          * system's page-alignment, there could be
9759                          * extra non-map-aligned map entries between
9760                          * the original (non-rounded) "src_addr + len"
9761                          * and the rounded "src_end".
9762                          * We do not want to copy those map entries since
9763                          * they're not part of the copied range.
9764                          */
9765                         break;
9766                 }
9767
9768                 if ((src_start >= src_end) && (src_end != 0))
9769                         break;
9770
9771                 /*
9772                  *      Verify that there are no gaps in the region
9773                  */
9774
9775                 tmp_entry = src_entry->vme_next;
9776                 if ((tmp_entry->vme_start != src_start) ||
9777                     (tmp_entry == vm_map_to_entry(src_map))) {
9778                         RETURN(KERN_INVALID_ADDRESS);
9779                 }
9780         }
9781
9782         /*
9783          * If the source should be destroyed, do it now, since the
9784          * copy was successful.
9785          */
9786         if (src_destroy) {
9787                 (void) vm_map_delete(
9788                         src_map,
9789                         vm_map_trunc_page(src_addr,
9790                                           VM_MAP_PAGE_MASK(src_map)),
9791                         src_end,
9792                         ((src_map == kernel_map) ?
9793                          VM_MAP_REMOVE_KUNWIRE :
9794                          VM_MAP_NO_FLAGS),
9795                         VM_MAP_NULL);
9796         } else {
9797                 /* fix up the damage we did in the base map */
9798                 vm_map_simplify_range(
9799                         src_map,
9800                         vm_map_trunc_page(src_addr,
9801                                           VM_MAP_PAGE_MASK(src_map)),
9802                         vm_map_round_page(src_end,
9803                                           VM_MAP_PAGE_MASK(src_map)));
9804         }
9805
9806         vm_map_unlock(src_map);
9807
9808         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9809                 vm_map_offset_t original_start, original_offset, original_end;
9810
9811                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9812
9813                 /* adjust alignment of first copy_entry's "vme_start" */
9814                 tmp_entry = vm_map_copy_first_entry(copy);
9815                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9816                         vm_map_offset_t adjustment;
9817
9818                         original_start = tmp_entry->vme_start;
9819                         original_offset = VME_OFFSET(tmp_entry);
9820
9821                         /* map-align the start of the first copy entry... */
9822                         adjustment = (tmp_entry->vme_start -
9823                                       vm_map_trunc_page(
9824                                               tmp_entry->vme_start,
9825                                               VM_MAP_PAGE_MASK(src_map)));
9826                         tmp_entry->vme_start -= adjustment;
9827                         VME_OFFSET_SET(tmp_entry,
9828                                        VME_OFFSET(tmp_entry) - adjustment);
9829                         copy_addr -= adjustment;
9830                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
9831                         /* ... adjust for mis-aligned start of copy range */
9832                         adjustment =
9833                                 (vm_map_trunc_page(copy->offset,
9834                                                    PAGE_MASK) -
9835                                  vm_map_trunc_page(copy->offset,
9836                                                    VM_MAP_PAGE_MASK(src_map)));
9837                         if (adjustment) {
9838                                 assert(page_aligned(adjustment));
9839                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9840                                 tmp_entry->vme_start += adjustment;
9841                                 VME_OFFSET_SET(tmp_entry,
9842                                                (VME_OFFSET(tmp_entry) +
9843                                                 adjustment));
9844                                 copy_addr += adjustment;
9845                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9846                         }
9847
9848                         /*
9849                          * Assert that the adjustments haven't exposed
9850                          * more than was originally copied...
9851                          */
9852                         assert(tmp_entry->vme_start >= original_start);
9853                         assert(VME_OFFSET(tmp_entry) >= original_offset);
9854                         /*
9855                          * ... and that it did not adjust outside of a
9856                          * a single 16K page.
9857                          */
9858                         assert(vm_map_trunc_page(tmp_entry->vme_start,
9859                                                  VM_MAP_PAGE_MASK(src_map)) ==
9860                                vm_map_trunc_page(original_start,
9861                                                  VM_MAP_PAGE_MASK(src_map)));
9862                 }
9863
9864                 /* adjust alignment of last copy_entry's "vme_end" */
9865                 tmp_entry = vm_map_copy_last_entry(copy);
9866                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9867                         vm_map_offset_t adjustment;
9868
9869                         original_end = tmp_entry->vme_end;
9870
9871                         /* map-align the end of the last copy entry... */
9872                         tmp_entry->vme_end =
9873                                 vm_map_round_page(tmp_entry->vme_end,
9874                                                   VM_MAP_PAGE_MASK(src_map));
9875                         /* ... adjust for mis-aligned end of copy range */
9876                         adjustment =
9877                                 (vm_map_round_page((copy->offset +
9878                                                     copy->size),
9879                                                    VM_MAP_PAGE_MASK(src_map)) -
9880                                  vm_map_round_page((copy->offset +
9881                                                     copy->size),
9882                                                    PAGE_MASK));
9883                         if (adjustment) {
9884                                 assert(page_aligned(adjustment));
9885                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9886                                 tmp_entry->vme_end -= adjustment;
9887                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9888                         }
9889
9890                         /*
9891                          * Assert that the adjustments haven't exposed
9892                          * more than was originally copied...
9893                          */
9894                         assert(tmp_entry->vme_end <= original_end);
9895                         /*
9896                          * ... and that it did not adjust outside of a
9897                          * a single 16K page.
9898                          */
9899                         assert(vm_map_round_page(tmp_entry->vme_end,
9900                                                  VM_MAP_PAGE_MASK(src_map)) ==
9901                                vm_map_round_page(original_end,
9902                                                  VM_MAP_PAGE_MASK(src_map)));
9903                 }
9904         }
9905
9906         /* Fix-up start and end points in copy.  This is necessary */
9907         /* when the various entries in the copy object were picked */
9908         /* up from different sub-maps */
9909
9910         tmp_entry = vm_map_copy_first_entry(copy);
9911         copy_size = 0; /* compute actual size */
9912         while (tmp_entry != vm_map_copy_to_entry(copy)) {
9913                 assert(VM_MAP_PAGE_ALIGNED(
9914                                copy_addr + (tmp_entry->vme_end -
9915                                             tmp_entry->vme_start),
9916                                VM_MAP_COPY_PAGE_MASK(copy)));
9917                 assert(VM_MAP_PAGE_ALIGNED(
9918                                copy_addr,
9919                                VM_MAP_COPY_PAGE_MASK(copy)));
9920
9921                 /*
9922                  * The copy_entries will be injected directly into the
9923                  * destination map and might not be "map aligned" there...
9924                  */
9925                 tmp_entry->map_aligned = FALSE;
9926
9927                 tmp_entry->vme_end = copy_addr +
9928                         (tmp_entry->vme_end - tmp_entry->vme_start);
9929                 tmp_entry->vme_start = copy_addr;
9930                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9931                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9932                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9933                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9934         }
9935
9936         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9937             copy_size < copy->size) {
9938                 /*
9939                  * The actual size of the VM map copy is smaller than what
9940                  * was requested by the caller.  This must be because some
9941                  * PAGE_SIZE-sized pages are missing at the end of the last
9942                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9943                  * The caller might not have been aware of those missing
9944                  * pages and might not want to be aware of it, which is
9945                  * fine as long as they don't try to access (and crash on)
9946                  * those missing pages.
9947                  * Let's adjust the size of the "copy", to avoid failing
9948                  * in vm_map_copyout() or vm_map_copy_overwrite().
9949                  */
9950                 assert(vm_map_round_page(copy_size,
9951                                          VM_MAP_PAGE_MASK(src_map)) ==
9952                        vm_map_round_page(copy->size,
9953                                          VM_MAP_PAGE_MASK(src_map)));
9954                 copy->size = copy_size;
9955         }
9956
9957         *copy_result = copy;
9958         return(KERN_SUCCESS);
9959
9960 #undef  RETURN
9961 }
9962
9963 kern_return_t
9964 vm_map_copy_extract(
9965         vm_map_t                src_map,
9966         vm_map_address_t        src_addr,
9967         vm_map_size_t           len,
9968         vm_map_copy_t           *copy_result,   /* OUT */
9969         vm_prot_t               *cur_prot,      /* OUT */
9970         vm_prot_t               *max_prot)
9971 {
9972         vm_map_offset_t src_start, src_end;
9973         vm_map_copy_t   copy;
9974         kern_return_t   kr;
9975
9976         /*
9977          *      Check for copies of zero bytes.
9978          */
9979
9980         if (len == 0) {
9981                 *copy_result = VM_MAP_COPY_NULL;
9982                 return(KERN_SUCCESS);
9983         }
9984
9985         /*
9986          *      Check that the end address doesn't overflow
9987          */
9988         src_end = src_addr + len;
9989         if (src_end < src_addr)
9990                 return KERN_INVALID_ADDRESS;
9991
9992         /*
9993          *      Compute (page aligned) start and end of region
9994          */
9995         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
9996         src_end = vm_map_round_page(src_end, PAGE_MASK);
9997
9998         /*
9999          *      Allocate a header element for the list.
10000          *
10001          *      Use the start and end in the header to
10002          *      remember the endpoints prior to rounding.
10003          */
10004
10005         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10006         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10007         vm_map_copy_first_entry(copy) =
10008                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10009         copy->type = VM_MAP_COPY_ENTRY_LIST;
10010         copy->cpy_hdr.nentries = 0;
10011         copy->cpy_hdr.entries_pageable = TRUE;
10012
10013         vm_map_store_init(&copy->cpy_hdr);
10014
10015         copy->offset = 0;
10016         copy->size = len;
10017
10018         kr = vm_map_remap_extract(src_map,
10019                                   src_addr,
10020                                   len,
10021                                   FALSE, /* copy */
10022                                   &copy->cpy_hdr,
10023                                   cur_prot,
10024                                   max_prot,
10025                                   VM_INHERIT_SHARE,
10026                                   TRUE); /* pageable */
10027         if (kr != KERN_SUCCESS) {
10028                 vm_map_copy_discard(copy);
10029                 return kr;
10030         }
10031
10032         *copy_result = copy;
10033         return KERN_SUCCESS;
10034 }
10035
10036 /*
10037  *      vm_map_copyin_object:
10038  *
10039  *      Create a copy object from an object.
10040  *      Our caller donates an object reference.
10041  */
10042
10043 kern_return_t
10044 vm_map_copyin_object(
10045         vm_object_t             object,
10046         vm_object_offset_t      offset, /* offset of region in object */
10047         vm_object_size_t        size,   /* size of region in object */
10048         vm_map_copy_t   *copy_result)   /* OUT */
10049 {
10050         vm_map_copy_t   copy;           /* Resulting copy */
10051
10052         /*
10053          *      We drop the object into a special copy object
10054          *      that contains the object directly.
10055          */
10056
10057         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10058         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10059         copy->type = VM_MAP_COPY_OBJECT;
10060         copy->cpy_object = object;
10061         copy->offset = offset;
10062         copy->size = size;
10063
10064         *copy_result = copy;
10065         return(KERN_SUCCESS);
10066 }
10067
10068 static void
10069 vm_map_fork_share(
10070         vm_map_t        old_map,
10071         vm_map_entry_t  old_entry,
10072         vm_map_t        new_map)
10073 {
10074         vm_object_t     object;
10075         vm_map_entry_t  new_entry;
10076
10077         /*
10078          *      New sharing code.  New map entry
10079          *      references original object.  Internal
10080          *      objects use asynchronous copy algorithm for
10081          *      future copies.  First make sure we have
10082          *      the right object.  If we need a shadow,
10083          *      or someone else already has one, then
10084          *      make a new shadow and share it.
10085          */
10086
10087         object = VME_OBJECT(old_entry);
10088         if (old_entry->is_sub_map) {
10089                 assert(old_entry->wired_count == 0);
10090 #ifndef NO_NESTED_PMAP
10091                 if(old_entry->use_pmap) {
10092                         kern_return_t   result;
10093
10094                         result = pmap_nest(new_map->pmap,
10095                                            (VME_SUBMAP(old_entry))->pmap,
10096                                            (addr64_t)old_entry->vme_start,
10097                                            (addr64_t)old_entry->vme_start,
10098                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
10099                         if(result)
10100                                 panic("vm_map_fork_share: pmap_nest failed!");
10101                 }
10102 #endif  /* NO_NESTED_PMAP */
10103         } else if (object == VM_OBJECT_NULL) {
10104                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
10105                                                             old_entry->vme_start));
10106                 VME_OFFSET_SET(old_entry, 0);
10107                 VME_OBJECT_SET(old_entry, object);
10108                 old_entry->use_pmap = TRUE;
10109                 assert(!old_entry->needs_copy);
10110         } else if (object->copy_strategy !=
10111                    MEMORY_OBJECT_COPY_SYMMETRIC) {
10112
10113                 /*
10114                  *      We are already using an asymmetric
10115                  *      copy, and therefore we already have
10116                  *      the right object.
10117                  */
10118
10119                 assert(! old_entry->needs_copy);
10120         }
10121         else if (old_entry->needs_copy ||       /* case 1 */
10122                  object->shadowed ||            /* case 2 */
10123                  (!object->true_share &&        /* case 3 */
10124                   !old_entry->is_shared &&
10125                   (object->vo_size >
10126                    (vm_map_size_t)(old_entry->vme_end -
10127                                    old_entry->vme_start)))) {
10128
10129                 /*
10130                  *      We need to create a shadow.
10131                  *      There are three cases here.
10132                  *      In the first case, we need to
10133                  *      complete a deferred symmetrical
10134                  *      copy that we participated in.
10135                  *      In the second and third cases,
10136                  *      we need to create the shadow so
10137                  *      that changes that we make to the
10138                  *      object do not interfere with
10139                  *      any symmetrical copies which
10140                  *      have occured (case 2) or which
10141                  *      might occur (case 3).
10142                  *
10143                  *      The first case is when we had
10144                  *      deferred shadow object creation
10145                  *      via the entry->needs_copy mechanism.
10146                  *      This mechanism only works when
10147                  *      only one entry points to the source
10148                  *      object, and we are about to create
10149                  *      a second entry pointing to the
10150                  *      same object. The problem is that
10151                  *      there is no way of mapping from
10152                  *      an object to the entries pointing
10153                  *      to it. (Deferred shadow creation
10154                  *      works with one entry because occurs
10155                  *      at fault time, and we walk from the
10156                  *      entry to the object when handling
10157                  *      the fault.)
10158                  *
10159                  *      The second case is when the object
10160                  *      to be shared has already been copied
10161                  *      with a symmetric copy, but we point
10162                  *      directly to the object without
10163                  *      needs_copy set in our entry. (This
10164                  *      can happen because different ranges
10165                  *      of an object can be pointed to by
10166                  *      different entries. In particular,
10167                  *      a single entry pointing to an object
10168                  *      can be split by a call to vm_inherit,
10169                  *      which, combined with task_create, can
10170                  *      result in the different entries
10171                  *      having different needs_copy values.)
10172                  *      The shadowed flag in the object allows
10173                  *      us to detect this case. The problem
10174                  *      with this case is that if this object
10175                  *      has or will have shadows, then we
10176                  *      must not perform an asymmetric copy
10177                  *      of this object, since such a copy
10178                  *      allows the object to be changed, which
10179                  *      will break the previous symmetrical
10180                  *      copies (which rely upon the object
10181                  *      not changing). In a sense, the shadowed
10182                  *      flag says "don't change this object".
10183                  *      We fix this by creating a shadow
10184                  *      object for this object, and sharing
10185                  *      that. This works because we are free
10186                  *      to change the shadow object (and thus
10187                  *      to use an asymmetric copy strategy);
10188                  *      this is also semantically correct,
10189                  *      since this object is temporary, and
10190                  *      therefore a copy of the object is
10191                  *      as good as the object itself. (This
10192                  *      is not true for permanent objects,
10193                  *      since the pager needs to see changes,
10194                  *      which won't happen if the changes
10195                  *      are made to a copy.)
10196                  *
10197                  *      The third case is when the object
10198                  *      to be shared has parts sticking
10199                  *      outside of the entry we're working
10200                  *      with, and thus may in the future
10201                  *      be subject to a symmetrical copy.
10202                  *      (This is a preemptive version of
10203                  *      case 2.)
10204                  */
10205                 VME_OBJECT_SHADOW(old_entry,
10206                                   (vm_map_size_t) (old_entry->vme_end -
10207                                                    old_entry->vme_start));
10208
10209                 /*
10210                  *      If we're making a shadow for other than
10211                  *      copy on write reasons, then we have
10212                  *      to remove write permission.
10213                  */
10214
10215                 if (!old_entry->needs_copy &&
10216                     (old_entry->protection & VM_PROT_WRITE)) {
10217                         vm_prot_t prot;
10218
10219                         prot = old_entry->protection & ~VM_PROT_WRITE;
10220
10221                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
10222                                 prot |= VM_PROT_EXECUTE;
10223
10224                         if (old_map->mapped_in_other_pmaps) {
10225                                 vm_object_pmap_protect(
10226                                         VME_OBJECT(old_entry),
10227                                         VME_OFFSET(old_entry),
10228                                         (old_entry->vme_end -
10229                                          old_entry->vme_start),
10230                                         PMAP_NULL,
10231                                         old_entry->vme_start,
10232                                         prot);
10233                         } else {
10234                                 pmap_protect(old_map->pmap,
10235                                              old_entry->vme_start,
10236                                              old_entry->vme_end,
10237                                              prot);
10238                         }
10239                 }
10240
10241                 old_entry->needs_copy = FALSE;
10242                 object = VME_OBJECT(old_entry);
10243         }
10244
10245
10246         /*
10247          *      If object was using a symmetric copy strategy,
10248          *      change its copy strategy to the default
10249          *      asymmetric copy strategy, which is copy_delay
10250          *      in the non-norma case and copy_call in the
10251          *      norma case. Bump the reference count for the
10252          *      new entry.
10253          */
10254
10255         if(old_entry->is_sub_map) {
10256                 vm_map_lock(VME_SUBMAP(old_entry));
10257                 vm_map_reference(VME_SUBMAP(old_entry));
10258                 vm_map_unlock(VME_SUBMAP(old_entry));
10259         } else {
10260                 vm_object_lock(object);
10261                 vm_object_reference_locked(object);
10262                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
10263                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
10264                 }
10265                 vm_object_unlock(object);
10266         }
10267
10268         /*
10269          *      Clone the entry, using object ref from above.
10270          *      Mark both entries as shared.
10271          */
10272
10273         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
10274                                                           * map or descendants */
10275         vm_map_entry_copy(new_entry, old_entry);
10276         old_entry->is_shared = TRUE;
10277         new_entry->is_shared = TRUE;
10278
10279         /*
10280          *      Insert the entry into the new map -- we
10281          *      know we're inserting at the end of the new
10282          *      map.
10283          */
10284
10285         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
10286
10287         /*
10288          *      Update the physical map
10289          */
10290
10291         if (old_entry->is_sub_map) {
10292                 /* Bill Angell pmap support goes here */
10293         } else {
10294                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
10295                           old_entry->vme_end - old_entry->vme_start,
10296                           old_entry->vme_start);
10297         }
10298 }
10299
10300 static boolean_t
10301 vm_map_fork_copy(
10302         vm_map_t        old_map,
10303         vm_map_entry_t  *old_entry_p,
10304         vm_map_t        new_map)
10305 {
10306         vm_map_entry_t old_entry = *old_entry_p;
10307         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
10308         vm_map_offset_t start = old_entry->vme_start;
10309         vm_map_copy_t copy;
10310         vm_map_entry_t last = vm_map_last_entry(new_map);
10311
10312         vm_map_unlock(old_map);
10313         /*
10314          *      Use maxprot version of copyin because we
10315          *      care about whether this memory can ever
10316          *      be accessed, not just whether it's accessible
10317          *      right now.
10318          */
10319         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
10320             != KERN_SUCCESS) {
10321                 /*
10322                  *      The map might have changed while it
10323                  *      was unlocked, check it again.  Skip
10324                  *      any blank space or permanently
10325                  *      unreadable region.
10326                  */
10327                 vm_map_lock(old_map);
10328                 if (!vm_map_lookup_entry(old_map, start, &last) ||
10329                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
10330                         last = last->vme_next;
10331                 }
10332                 *old_entry_p = last;
10333
10334                 /*
10335                  * XXX  For some error returns, want to
10336                  * XXX  skip to the next element.  Note
10337                  *      that INVALID_ADDRESS and
10338                  *      PROTECTION_FAILURE are handled above.
10339                  */
10340
10341                 return FALSE;
10342         }
10343
10344         /*
10345          *      Insert the copy into the new map
10346          */
10347
10348         vm_map_copy_insert(new_map, last, copy);
10349
10350         /*
10351          *      Pick up the traversal at the end of
10352          *      the copied region.
10353          */
10354
10355         vm_map_lock(old_map);
10356         start += entry_size;
10357         if (! vm_map_lookup_entry(old_map, start, &last)) {
10358                 last = last->vme_next;
10359         } else {
10360                 if (last->vme_start == start) {
10361                         /*
10362                          * No need to clip here and we don't
10363                          * want to cause any unnecessary
10364                          * unnesting...
10365                          */
10366                 } else {
10367                         vm_map_clip_start(old_map, last, start);
10368                 }
10369         }
10370         *old_entry_p = last;
10371
10372         return TRUE;
10373 }
10374
10375 /*
10376  *      vm_map_fork:
10377  *
10378  *      Create and return a new map based on the old
10379  *      map, according to the inheritance values on the
10380  *      regions in that map.
10381  *
10382  *      The source map must not be locked.
10383  */
10384 vm_map_t
10385 vm_map_fork(
10386         ledger_t        ledger,
10387         vm_map_t        old_map)
10388 {
10389         pmap_t          new_pmap;
10390         vm_map_t        new_map;
10391         vm_map_entry_t  old_entry;
10392         vm_map_size_t   new_size = 0, entry_size;
10393         vm_map_entry_t  new_entry;
10394         boolean_t       src_needs_copy;
10395         boolean_t       new_entry_needs_copy;
10396         boolean_t       pmap_is64bit;
10397
10398         pmap_is64bit =
10399 #if defined(__i386__) || defined(__x86_64__)
10400                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
10401 #else
10402 #error Unknown architecture.
10403 #endif
10404
10405         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
10406
10407         vm_map_reference_swap(old_map);
10408         vm_map_lock(old_map);
10409
10410         new_map = vm_map_create(new_pmap,
10411                                 old_map->min_offset,
10412                                 old_map->max_offset,
10413                                 old_map->hdr.entries_pageable);
10414         /* inherit the parent map's page size */
10415         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
10416         for (
10417                 old_entry = vm_map_first_entry(old_map);
10418                 old_entry != vm_map_to_entry(old_map);
10419                 ) {
10420
10421                 entry_size = old_entry->vme_end - old_entry->vme_start;
10422
10423                 switch (old_entry->inheritance) {
10424                 case VM_INHERIT_NONE:
10425                         break;
10426
10427                 case VM_INHERIT_SHARE:
10428                         vm_map_fork_share(old_map, old_entry, new_map);
10429                         new_size += entry_size;
10430                         break;
10431
10432                 case VM_INHERIT_COPY:
10433
10434                         /*
10435                          *      Inline the copy_quickly case;
10436                          *      upon failure, fall back on call
10437                          *      to vm_map_fork_copy.
10438                          */
10439
10440                         if(old_entry->is_sub_map)
10441                                 break;
10442                         if ((old_entry->wired_count != 0) ||
10443                             ((VME_OBJECT(old_entry) != NULL) &&
10444                              (VME_OBJECT(old_entry)->true_share))) {
10445                                 goto slow_vm_map_fork_copy;
10446                         }
10447
10448                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
10449                         vm_map_entry_copy(new_entry, old_entry);
10450                         if (new_entry->is_sub_map) {
10451                                 /* clear address space specifics */
10452                                 new_entry->use_pmap = FALSE;
10453                         }
10454
10455                         if (! vm_object_copy_quickly(
10456                                     &VME_OBJECT(new_entry),
10457                                     VME_OFFSET(old_entry),
10458                                     (old_entry->vme_end -
10459                                      old_entry->vme_start),
10460                                     &src_needs_copy,
10461                                     &new_entry_needs_copy)) {
10462                                 vm_map_entry_dispose(new_map, new_entry);
10463                                 goto slow_vm_map_fork_copy;
10464                         }
10465
10466                         /*
10467                          *      Handle copy-on-write obligations
10468                          */
10469
10470                         if (src_needs_copy && !old_entry->needs_copy) {
10471                                 vm_prot_t prot;
10472
10473                                 prot = old_entry->protection & ~VM_PROT_WRITE;
10474
10475                                 if (override_nx(old_map, VME_ALIAS(old_entry))
10476                                     && prot)
10477                                         prot |= VM_PROT_EXECUTE;
10478
10479                                 vm_object_pmap_protect(
10480                                         VME_OBJECT(old_entry),
10481                                         VME_OFFSET(old_entry),
10482                                         (old_entry->vme_end -
10483                                          old_entry->vme_start),
10484                                         ((old_entry->is_shared
10485                                           || old_map->mapped_in_other_pmaps)
10486                                          ? PMAP_NULL :
10487                                          old_map->pmap),
10488                                         old_entry->vme_start,
10489                                         prot);
10490
10491                                 assert(old_entry->wired_count == 0);
10492                                 old_entry->needs_copy = TRUE;
10493                         }
10494                         new_entry->needs_copy = new_entry_needs_copy;
10495
10496                         /*
10497                          *      Insert the entry at the end
10498                          *      of the map.
10499                          */
10500
10501                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
10502                                           new_entry);
10503                         new_size += entry_size;
10504                         break;
10505
10506                 slow_vm_map_fork_copy:
10507                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
10508                                 new_size += entry_size;
10509                         }
10510                         continue;
10511                 }
10512                 old_entry = old_entry->vme_next;
10513         }
10514
10515
10516         new_map->size = new_size;
10517         vm_map_unlock(old_map);
10518         vm_map_deallocate(old_map);
10519
10520         return(new_map);
10521 }
10522
10523 /*
10524  * vm_map_exec:
10525  *
10526  *      Setup the "new_map" with the proper execution environment according
10527  *      to the type of executable (platform, 64bit, chroot environment).
10528  *      Map the comm page and shared region, etc...
10529  */
10530 kern_return_t
10531 vm_map_exec(
10532         vm_map_t        new_map,
10533         task_t          task,
10534         void            *fsroot,
10535         cpu_type_t      cpu)
10536 {
10537         SHARED_REGION_TRACE_DEBUG(
10538                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
10539                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10540                  (void *)VM_KERNEL_ADDRPERM(new_map),
10541                  (void *)VM_KERNEL_ADDRPERM(task),
10542                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10543                  cpu));
10544         (void) vm_commpage_enter(new_map, task);
10545         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
10546         SHARED_REGION_TRACE_DEBUG(
10547                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
10548                  (void *)VM_KERNEL_ADDRPERM(current_task()),
10549                  (void *)VM_KERNEL_ADDRPERM(new_map),
10550                  (void *)VM_KERNEL_ADDRPERM(task),
10551                  (void *)VM_KERNEL_ADDRPERM(fsroot),
10552                  cpu));
10553         return KERN_SUCCESS;
10554 }
10555
10556 /*
10557  *      vm_map_lookup_locked:
10558  *
10559  *      Finds the VM object, offset, and
10560  *      protection for a given virtual address in the
10561  *      specified map, assuming a page fault of the
10562  *      type specified.
10563  *
10564  *      Returns the (object, offset, protection) for
10565  *      this address, whether it is wired down, and whether
10566  *      this map has the only reference to the data in question.
10567  *      In order to later verify this lookup, a "version"
10568  *      is returned.
10569  *
10570  *      The map MUST be locked by the caller and WILL be
10571  *      locked on exit.  In order to guarantee the
10572  *      existence of the returned object, it is returned
10573  *      locked.
10574  *
10575  *      If a lookup is requested with "write protection"
10576  *      specified, the map may be changed to perform virtual
10577  *      copying operations, although the data referenced will
10578  *      remain the same.
10579  */
10580 kern_return_t
10581 vm_map_lookup_locked(
10582         vm_map_t                *var_map,       /* IN/OUT */
10583         vm_map_offset_t         vaddr,
10584         vm_prot_t               fault_type,
10585         int                     object_lock_type,
10586         vm_map_version_t        *out_version,   /* OUT */
10587         vm_object_t             *object,        /* OUT */
10588         vm_object_offset_t      *offset,        /* OUT */
10589         vm_prot_t               *out_prot,      /* OUT */
10590         boolean_t               *wired,         /* OUT */
10591         vm_object_fault_info_t  fault_info,     /* OUT */
10592         vm_map_t                *real_map)
10593 {
10594         vm_map_entry_t                  entry;
10595         register vm_map_t               map = *var_map;
10596         vm_map_t                        old_map = *var_map;
10597         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
10598         vm_map_offset_t                 cow_parent_vaddr = 0;
10599         vm_map_offset_t                 old_start = 0;
10600         vm_map_offset_t                 old_end = 0;
10601         register vm_prot_t              prot;
10602         boolean_t                       mask_protections;
10603         boolean_t                       force_copy;
10604         vm_prot_t                       original_fault_type;
10605
10606         /*
10607          * VM_PROT_MASK means that the caller wants us to use "fault_type"
10608          * as a mask against the mapping's actual protections, not as an
10609          * absolute value.
10610          */
10611         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
10612         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
10613         fault_type &= VM_PROT_ALL;
10614         original_fault_type = fault_type;
10615
10616         *real_map = map;
10617
10618 RetryLookup:
10619         fault_type = original_fault_type;
10620
10621         /*
10622          *      If the map has an interesting hint, try it before calling
10623          *      full blown lookup routine.
10624          */
10625         entry = map->hint;
10626
10627         if ((entry == vm_map_to_entry(map)) ||
10628             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10629                 vm_map_entry_t  tmp_entry;
10630
10631                 /*
10632                  *      Entry was either not a valid hint, or the vaddr
10633                  *      was not contained in the entry, so do a full lookup.
10634                  */
10635                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10636                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10637                                 vm_map_unlock(cow_sub_map_parent);
10638                         if((*real_map != map)
10639                            && (*real_map != cow_sub_map_parent))
10640                                 vm_map_unlock(*real_map);
10641                         return KERN_INVALID_ADDRESS;
10642                 }
10643
10644                 entry = tmp_entry;
10645         }
10646         if(map == old_map) {
10647                 old_start = entry->vme_start;
10648                 old_end = entry->vme_end;
10649         }
10650
10651         /*
10652          *      Handle submaps.  Drop lock on upper map, submap is
10653          *      returned locked.
10654          */
10655
10656 submap_recurse:
10657         if (entry->is_sub_map) {
10658                 vm_map_offset_t         local_vaddr;
10659                 vm_map_offset_t         end_delta;
10660                 vm_map_offset_t         start_delta;
10661                 vm_map_entry_t          submap_entry;
10662                 boolean_t               mapped_needs_copy=FALSE;
10663
10664                 local_vaddr = vaddr;
10665
10666                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10667                         /* if real_map equals map we unlock below */
10668                         if ((*real_map != map) &&
10669                             (*real_map != cow_sub_map_parent))
10670                                 vm_map_unlock(*real_map);
10671                         *real_map = VME_SUBMAP(entry);
10672                 }
10673
10674                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10675                         if (!mapped_needs_copy) {
10676                                 if (vm_map_lock_read_to_write(map)) {
10677                                         vm_map_lock_read(map);
10678                                         *real_map = map;
10679                                         goto RetryLookup;
10680                                 }
10681                                 vm_map_lock_read(VME_SUBMAP(entry));
10682                                 *var_map = VME_SUBMAP(entry);
10683                                 cow_sub_map_parent = map;
10684                                 /* reset base to map before cow object */
10685                                 /* this is the map which will accept   */
10686                                 /* the new cow object */
10687                                 old_start = entry->vme_start;
10688                                 old_end = entry->vme_end;
10689                                 cow_parent_vaddr = vaddr;
10690                                 mapped_needs_copy = TRUE;
10691                         } else {
10692                                 vm_map_lock_read(VME_SUBMAP(entry));
10693                                 *var_map = VME_SUBMAP(entry);
10694                                 if((cow_sub_map_parent != map) &&
10695                                    (*real_map != map))
10696                                         vm_map_unlock(map);
10697                         }
10698                 } else {
10699                         vm_map_lock_read(VME_SUBMAP(entry));
10700                         *var_map = VME_SUBMAP(entry);
10701                         /* leave map locked if it is a target */
10702                         /* cow sub_map above otherwise, just  */
10703                         /* follow the maps down to the object */
10704                         /* here we unlock knowing we are not  */
10705                         /* revisiting the map.  */
10706                         if((*real_map != map) && (map != cow_sub_map_parent))
10707                                 vm_map_unlock_read(map);
10708                 }
10709
10710                 map = *var_map;
10711
10712                 /* calculate the offset in the submap for vaddr */
10713                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
10714
10715         RetrySubMap:
10716                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10717                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10718                                 vm_map_unlock(cow_sub_map_parent);
10719                         }
10720                         if((*real_map != map)
10721                            && (*real_map != cow_sub_map_parent)) {
10722                                 vm_map_unlock(*real_map);
10723                         }
10724                         *real_map = map;
10725                         return KERN_INVALID_ADDRESS;
10726                 }
10727
10728                 /* find the attenuated shadow of the underlying object */
10729                 /* on our target map */
10730
10731                 /* in english the submap object may extend beyond the     */
10732                 /* region mapped by the entry or, may only fill a portion */
10733                 /* of it.  For our purposes, we only care if the object   */
10734                 /* doesn't fill.  In this case the area which will        */
10735                 /* ultimately be clipped in the top map will only need    */
10736                 /* to be as big as the portion of the underlying entry    */
10737                 /* which is mapped */
10738                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
10739                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
10740
10741                 end_delta =
10742                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
10743                         submap_entry->vme_end ?
10744                         0 : (VME_OFFSET(entry) +
10745                              (old_end - old_start))
10746                         - submap_entry->vme_end;
10747
10748                 old_start += start_delta;
10749                 old_end -= end_delta;
10750
10751                 if(submap_entry->is_sub_map) {
10752                         entry = submap_entry;
10753                         vaddr = local_vaddr;
10754                         goto submap_recurse;
10755                 }
10756
10757                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10758
10759                         vm_object_t     sub_object, copy_object;
10760                         vm_object_offset_t copy_offset;
10761                         vm_map_offset_t local_start;
10762                         vm_map_offset_t local_end;
10763                         boolean_t               copied_slowly = FALSE;
10764
10765                         if (vm_map_lock_read_to_write(map)) {
10766                                 vm_map_lock_read(map);
10767                                 old_start -= start_delta;
10768                                 old_end += end_delta;
10769                                 goto RetrySubMap;
10770                         }
10771
10772
10773                         sub_object = VME_OBJECT(submap_entry);
10774                         if (sub_object == VM_OBJECT_NULL) {
10775                                 sub_object =
10776                                         vm_object_allocate(
10777                                                 (vm_map_size_t)
10778                                                 (submap_entry->vme_end -
10779                                                  submap_entry->vme_start));
10780                                 VME_OBJECT_SET(submap_entry, sub_object);
10781                                 VME_OFFSET_SET(submap_entry, 0);
10782                         }
10783                         local_start =  local_vaddr -
10784                                 (cow_parent_vaddr - old_start);
10785                         local_end = local_vaddr +
10786                                 (old_end - cow_parent_vaddr);
10787                         vm_map_clip_start(map, submap_entry, local_start);
10788                         vm_map_clip_end(map, submap_entry, local_end);
10789                         if (submap_entry->is_sub_map) {
10790                                 /* unnesting was done when clipping */
10791                                 assert(!submap_entry->use_pmap);
10792                         }
10793
10794                         /* This is the COW case, lets connect */
10795                         /* an entry in our space to the underlying */
10796                         /* object in the submap, bypassing the  */
10797                         /* submap. */
10798
10799
10800                         if(submap_entry->wired_count != 0 ||
10801                            (sub_object->copy_strategy ==
10802                             MEMORY_OBJECT_COPY_NONE)) {
10803                                 vm_object_lock(sub_object);
10804                                 vm_object_copy_slowly(sub_object,
10805                                                       VME_OFFSET(submap_entry),
10806                                                       (submap_entry->vme_end -
10807                                                        submap_entry->vme_start),
10808                                                       FALSE,
10809                                                       &copy_object);
10810                                 copied_slowly = TRUE;
10811                         } else {
10812
10813                                 /* set up shadow object */
10814                                 copy_object = sub_object;
10815                                 vm_object_reference(copy_object);
10816                                 sub_object->shadowed = TRUE;
10817                                 assert(submap_entry->wired_count == 0);
10818                                 submap_entry->needs_copy = TRUE;
10819
10820                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
10821
10822                                 if (override_nx(old_map,
10823                                                 VME_ALIAS(submap_entry))
10824                                     && prot)
10825                                         prot |= VM_PROT_EXECUTE;
10826
10827                                 vm_object_pmap_protect(
10828                                         sub_object,
10829                                         VME_OFFSET(submap_entry),
10830                                         submap_entry->vme_end -
10831                                         submap_entry->vme_start,
10832                                         (submap_entry->is_shared
10833                                          || map->mapped_in_other_pmaps) ?
10834                                         PMAP_NULL : map->pmap,
10835                                         submap_entry->vme_start,
10836                                         prot);
10837                         }
10838
10839                         /*
10840                          * Adjust the fault offset to the submap entry.
10841                          */
10842                         copy_offset = (local_vaddr -
10843                                        submap_entry->vme_start +
10844                                        VME_OFFSET(submap_entry));
10845
10846                         /* This works diffently than the   */
10847                         /* normal submap case. We go back  */
10848                         /* to the parent of the cow map and*/
10849                         /* clip out the target portion of  */
10850                         /* the sub_map, substituting the   */
10851                         /* new copy object,                */
10852
10853                         vm_map_unlock(map);
10854                         local_start = old_start;
10855                         local_end = old_end;
10856                         map = cow_sub_map_parent;
10857                         *var_map = cow_sub_map_parent;
10858                         vaddr = cow_parent_vaddr;
10859                         cow_sub_map_parent = NULL;
10860
10861                         if(!vm_map_lookup_entry(map,
10862                                                 vaddr, &entry)) {
10863                                 vm_object_deallocate(
10864                                         copy_object);
10865                                 vm_map_lock_write_to_read(map);
10866                                 return KERN_INVALID_ADDRESS;
10867                         }
10868
10869                         /* clip out the portion of space */
10870                         /* mapped by the sub map which   */
10871                         /* corresponds to the underlying */
10872                         /* object */
10873
10874                         /*
10875                          * Clip (and unnest) the smallest nested chunk
10876                          * possible around the faulting address...
10877                          */
10878                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
10879                         local_end = local_start + pmap_nesting_size_min;
10880                         /*
10881                          * ... but don't go beyond the "old_start" to "old_end"
10882                          * range, to avoid spanning over another VM region
10883                          * with a possibly different VM object and/or offset.
10884                          */
10885                         if (local_start < old_start) {
10886                                 local_start = old_start;
10887                         }
10888                         if (local_end > old_end) {
10889                                 local_end = old_end;
10890                         }
10891                         /*
10892                          * Adjust copy_offset to the start of the range.
10893                          */
10894                         copy_offset -= (vaddr - local_start);
10895
10896                         vm_map_clip_start(map, entry, local_start);
10897                         vm_map_clip_end(map, entry, local_end);
10898                         if (entry->is_sub_map) {
10899                                 /* unnesting was done when clipping */
10900                                 assert(!entry->use_pmap);
10901                         }
10902
10903                         /* substitute copy object for */
10904                         /* shared map entry           */
10905                         vm_map_deallocate(VME_SUBMAP(entry));
10906                         assert(!entry->iokit_acct);
10907                         entry->is_sub_map = FALSE;
10908                         entry->use_pmap = TRUE;
10909                         VME_OBJECT_SET(entry, copy_object);
10910
10911                         /* propagate the submap entry's protections */
10912                         entry->protection |= submap_entry->protection;
10913                         entry->max_protection |= submap_entry->max_protection;
10914
10915                         if(copied_slowly) {
10916                                 VME_OFFSET_SET(entry, local_start - old_start);
10917                                 entry->needs_copy = FALSE;
10918                                 entry->is_shared = FALSE;
10919                         } else {
10920                                 VME_OFFSET_SET(entry, copy_offset);
10921                                 assert(entry->wired_count == 0);
10922                                 entry->needs_copy = TRUE;
10923                                 if(entry->inheritance == VM_INHERIT_SHARE)
10924                                         entry->inheritance = VM_INHERIT_COPY;
10925                                 if (map != old_map)
10926                                         entry->is_shared = TRUE;
10927                         }
10928                         if(entry->inheritance == VM_INHERIT_SHARE)
10929                                 entry->inheritance = VM_INHERIT_COPY;
10930
10931                         vm_map_lock_write_to_read(map);
10932                 } else {
10933                         if((cow_sub_map_parent)
10934                            && (cow_sub_map_parent != *real_map)
10935                            && (cow_sub_map_parent != map)) {
10936                                 vm_map_unlock(cow_sub_map_parent);
10937                         }
10938                         entry = submap_entry;
10939                         vaddr = local_vaddr;
10940                 }
10941         }
10942
10943         /*
10944          *      Check whether this task is allowed to have
10945          *      this page.
10946          */
10947
10948         prot = entry->protection;
10949
10950         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
10951                 /*
10952                  * HACK -- if not a stack, then allow execution
10953                  */
10954                 prot |= VM_PROT_EXECUTE;
10955         }
10956
10957         if (mask_protections) {
10958                 fault_type &= prot;
10959                 if (fault_type == VM_PROT_NONE) {
10960                         goto protection_failure;
10961                 }
10962         }
10963         if ((fault_type & (prot)) != fault_type) {
10964         protection_failure:
10965                 if (*real_map != map) {
10966                         vm_map_unlock(*real_map);
10967                 }
10968                 *real_map = map;
10969
10970                 if ((fault_type & VM_PROT_EXECUTE) && prot)
10971                         log_stack_execution_failure((addr64_t)vaddr, prot);
10972
10973                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10974                 return KERN_PROTECTION_FAILURE;
10975         }
10976
10977         /*
10978          *      If this page is not pageable, we have to get
10979          *      it for all possible accesses.
10980          */
10981
10982         *wired = (entry->wired_count != 0);
10983         if (*wired)
10984                 fault_type = prot;
10985
10986         /*
10987          *      If the entry was copy-on-write, we either ...
10988          */
10989
10990         if (entry->needs_copy) {
10991                 /*
10992                  *      If we want to write the page, we may as well
10993                  *      handle that now since we've got the map locked.
10994                  *
10995                  *      If we don't need to write the page, we just
10996                  *      demote the permissions allowed.
10997                  */
10998
10999                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
11000                         /*
11001                          *      Make a new object, and place it in the
11002                          *      object chain.  Note that no new references
11003                          *      have appeared -- one just moved from the
11004                          *      map to the new object.
11005                          */
11006
11007                         if (vm_map_lock_read_to_write(map)) {
11008                                 vm_map_lock_read(map);
11009                                 goto RetryLookup;
11010                         }
11011                         VME_OBJECT_SHADOW(entry,
11012                                           (vm_map_size_t) (entry->vme_end -
11013                                                            entry->vme_start));
11014
11015                         VME_OBJECT(entry)->shadowed = TRUE;
11016                         entry->needs_copy = FALSE;
11017                         vm_map_lock_write_to_read(map);
11018                 }
11019                 else {
11020                         /*
11021                          *      We're attempting to read a copy-on-write
11022                          *      page -- don't allow writes.
11023                          */
11024
11025                         prot &= (~VM_PROT_WRITE);
11026                 }
11027         }
11028
11029         /*
11030          *      Create an object if necessary.
11031          */
11032         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
11033
11034                 if (vm_map_lock_read_to_write(map)) {
11035                         vm_map_lock_read(map);
11036                         goto RetryLookup;
11037                 }
11038
11039                 VME_OBJECT_SET(entry,
11040                                vm_object_allocate(
11041                                        (vm_map_size_t)(entry->vme_end -
11042                                                        entry->vme_start)));
11043                 VME_OFFSET_SET(entry, 0);
11044                 vm_map_lock_write_to_read(map);
11045         }
11046
11047         /*
11048          *      Return the object/offset from this entry.  If the entry
11049          *      was copy-on-write or empty, it has been fixed up.  Also
11050          *      return the protection.
11051          */
11052
11053         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
11054         *object = VME_OBJECT(entry);
11055         *out_prot = prot;
11056
11057         if (fault_info) {
11058                 fault_info->interruptible = THREAD_UNINT; /* for now... */
11059                 /* ... the caller will change "interruptible" if needed */
11060                 fault_info->cluster_size = 0;
11061                 fault_info->user_tag = VME_ALIAS(entry);
11062                 fault_info->pmap_options = 0;
11063                 if (entry->iokit_acct ||
11064                     (!entry->is_sub_map && !entry->use_pmap)) {
11065                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11066                 }
11067                 fault_info->behavior = entry->behavior;
11068                 fault_info->lo_offset = VME_OFFSET(entry);
11069                 fault_info->hi_offset =
11070                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
11071                 fault_info->no_cache  = entry->no_cache;
11072                 fault_info->stealth = FALSE;
11073                 fault_info->io_sync = FALSE;
11074                 if (entry->used_for_jit ||
11075                     entry->vme_resilient_codesign) {
11076                         fault_info->cs_bypass = TRUE;
11077                 } else {
11078                         fault_info->cs_bypass = FALSE;
11079                 }
11080                 fault_info->mark_zf_absent = FALSE;
11081                 fault_info->batch_pmap_op = FALSE;
11082         }
11083
11084         /*
11085          *      Lock the object to prevent it from disappearing
11086          */
11087         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
11088                 vm_object_lock(*object);
11089         else
11090                 vm_object_lock_shared(*object);
11091
11092         /*
11093          *      Save the version number
11094          */
11095
11096         out_version->main_timestamp = map->timestamp;
11097
11098         return KERN_SUCCESS;
11099 }
11100
11101
11102 /*
11103  *      vm_map_verify:
11104  *
11105  *      Verifies that the map in question has not changed
11106  *      since the given version.  If successful, the map
11107  *      will not change until vm_map_verify_done() is called.
11108  */
11109 boolean_t
11110 vm_map_verify(
11111         register vm_map_t               map,
11112         register vm_map_version_t       *version)       /* REF */
11113 {
11114         boolean_t       result;
11115
11116         vm_map_lock_read(map);
11117         result = (map->timestamp == version->main_timestamp);
11118
11119         if (!result)
11120                 vm_map_unlock_read(map);
11121
11122         return(result);
11123 }
11124
11125 /*
11126  *      vm_map_verify_done:
11127  *
11128  *      Releases locks acquired by a vm_map_verify.
11129  *
11130  *      This is now a macro in vm/vm_map.h.  It does a
11131  *      vm_map_unlock_read on the map.
11132  */
11133
11134
11135 /*
11136  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
11137  *      Goes away after regular vm_region_recurse function migrates to
11138  *      64 bits
11139  *      vm_region_recurse: A form of vm_region which follows the
11140  *      submaps in a target map
11141  *
11142  */
11143
11144 kern_return_t
11145 vm_map_region_recurse_64(
11146         vm_map_t                 map,
11147         vm_map_offset_t *address,               /* IN/OUT */
11148         vm_map_size_t           *size,                  /* OUT */
11149         natural_t               *nesting_depth, /* IN/OUT */
11150         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
11151         mach_msg_type_number_t  *count) /* IN/OUT */
11152 {
11153         mach_msg_type_number_t  original_count;
11154         vm_region_extended_info_data_t  extended;
11155         vm_map_entry_t                  tmp_entry;
11156         vm_map_offset_t                 user_address;
11157         unsigned int                    user_max_depth;
11158
11159         /*
11160          * "curr_entry" is the VM map entry preceding or including the
11161          * address we're looking for.
11162          * "curr_map" is the map or sub-map containing "curr_entry".
11163          * "curr_address" is the equivalent of the top map's "user_address"
11164          * in the current map.
11165          * "curr_offset" is the cumulated offset of "curr_map" in the
11166          * target task's address space.
11167          * "curr_depth" is the depth of "curr_map" in the chain of
11168          * sub-maps.
11169          *
11170          * "curr_max_below" and "curr_max_above" limit the range (around
11171          * "curr_address") we should take into account in the current (sub)map.
11172          * They limit the range to what's visible through the map entries
11173          * we've traversed from the top map to the current map.
11174
11175          */
11176         vm_map_entry_t                  curr_entry;
11177         vm_map_address_t                curr_address;
11178         vm_map_offset_t                 curr_offset;
11179         vm_map_t                        curr_map;
11180         unsigned int                    curr_depth;
11181         vm_map_offset_t                 curr_max_below, curr_max_above;
11182         vm_map_offset_t                 curr_skip;
11183
11184         /*
11185          * "next_" is the same as "curr_" but for the VM region immediately
11186          * after the address we're looking for.  We need to keep track of this
11187          * too because we want to return info about that region if the
11188          * address we're looking for is not mapped.
11189          */
11190         vm_map_entry_t                  next_entry;
11191         vm_map_offset_t                 next_offset;
11192         vm_map_offset_t                 next_address;
11193         vm_map_t                        next_map;
11194         unsigned int                    next_depth;
11195         vm_map_offset_t                 next_max_below, next_max_above;
11196         vm_map_offset_t                 next_skip;
11197
11198         boolean_t                       look_for_pages;
11199         vm_region_submap_short_info_64_t short_info;
11200
11201         if (map == VM_MAP_NULL) {
11202                 /* no address space to work on */
11203                 return KERN_INVALID_ARGUMENT;
11204         }
11205
11206
11207         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
11208                 /*
11209                  * "info" structure is not big enough and
11210                  * would overflow
11211                  */
11212                 return KERN_INVALID_ARGUMENT;
11213         }
11214
11215         original_count = *count;
11216
11217         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
11218                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
11219                 look_for_pages = FALSE;
11220                 short_info = (vm_region_submap_short_info_64_t) submap_info;
11221                 submap_info = NULL;
11222         } else {
11223                 look_for_pages = TRUE;
11224                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
11225                 short_info = NULL;
11226
11227                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11228                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
11229                 }
11230         }
11231
11232         user_address = *address;
11233         user_max_depth = *nesting_depth;
11234
11235         if (not_in_kdp) {
11236                 vm_map_lock_read(map);
11237         }
11238
11239 recurse_again:
11240         curr_entry = NULL;
11241         curr_map = map;
11242         curr_address = user_address;
11243         curr_offset = 0;
11244         curr_skip = 0;
11245         curr_depth = 0;
11246         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
11247         curr_max_below = curr_address;
11248
11249         next_entry = NULL;
11250         next_map = NULL;
11251         next_address = 0;
11252         next_offset = 0;
11253         next_skip = 0;
11254         next_depth = 0;
11255         next_max_above = (vm_map_offset_t) -1;
11256         next_max_below = (vm_map_offset_t) -1;
11257
11258         for (;;) {
11259                 if (vm_map_lookup_entry(curr_map,
11260                                         curr_address,
11261                                         &tmp_entry)) {
11262                         /* tmp_entry contains the address we're looking for */
11263                         curr_entry = tmp_entry;
11264                 } else {
11265                         vm_map_offset_t skip;
11266                         /*
11267                          * The address is not mapped.  "tmp_entry" is the
11268                          * map entry preceding the address.  We want the next
11269                          * one, if it exists.
11270                          */
11271                         curr_entry = tmp_entry->vme_next;
11272
11273                         if (curr_entry == vm_map_to_entry(curr_map) ||
11274                             (curr_entry->vme_start >=
11275                              curr_address + curr_max_above)) {
11276                                 /* no next entry at this level: stop looking */
11277                                 if (not_in_kdp) {
11278                                         vm_map_unlock_read(curr_map);
11279                                 }
11280                                 curr_entry = NULL;
11281                                 curr_map = NULL;
11282                                 curr_skip = 0;
11283                                 curr_offset = 0;
11284                                 curr_depth = 0;
11285                                 curr_max_above = 0;
11286                                 curr_max_below = 0;
11287                                 break;
11288                         }
11289
11290                         /* adjust current address and offset */
11291                         skip = curr_entry->vme_start - curr_address;
11292                         curr_address = curr_entry->vme_start;
11293                         curr_skip += skip;
11294                         curr_offset += skip;
11295                         curr_max_above -= skip;
11296                         curr_max_below = 0;
11297                 }
11298
11299                 /*
11300                  * Is the next entry at this level closer to the address (or
11301                  * deeper in the submap chain) than the one we had
11302                  * so far ?
11303                  */
11304                 tmp_entry = curr_entry->vme_next;
11305                 if (tmp_entry == vm_map_to_entry(curr_map)) {
11306                         /* no next entry at this level */
11307                 } else if (tmp_entry->vme_start >=
11308                            curr_address + curr_max_above) {
11309                         /*
11310                          * tmp_entry is beyond the scope of what we mapped of
11311                          * this submap in the upper level: ignore it.
11312                          */
11313                 } else if ((next_entry == NULL) ||
11314                            (tmp_entry->vme_start + curr_offset <=
11315                             next_entry->vme_start + next_offset)) {
11316                         /*
11317                          * We didn't have a "next_entry" or this one is
11318                          * closer to the address we're looking for:
11319                          * use this "tmp_entry" as the new "next_entry".
11320                          */
11321                         if (next_entry != NULL) {
11322                                 /* unlock the last "next_map" */
11323                                 if (next_map != curr_map && not_in_kdp) {
11324                                         vm_map_unlock_read(next_map);
11325                                 }
11326                         }
11327                         next_entry = tmp_entry;
11328                         next_map = curr_map;
11329                         next_depth = curr_depth;
11330                         next_address = next_entry->vme_start;
11331                         next_skip = curr_skip;
11332                         next_skip += (next_address - curr_address);
11333                         next_offset = curr_offset;
11334                         next_offset += (next_address - curr_address);
11335                         next_max_above = MIN(next_max_above, curr_max_above);
11336                         next_max_above = MIN(next_max_above,
11337                                              next_entry->vme_end - next_address);
11338                         next_max_below = MIN(next_max_below, curr_max_below);
11339                         next_max_below = MIN(next_max_below,
11340                                              next_address - next_entry->vme_start);
11341                 }
11342
11343                 /*
11344                  * "curr_max_{above,below}" allow us to keep track of the
11345                  * portion of the submap that is actually mapped at this level:
11346                  * the rest of that submap is irrelevant to us, since it's not
11347                  * mapped here.
11348                  * The relevant portion of the map starts at
11349                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
11350                  */
11351                 curr_max_above = MIN(curr_max_above,
11352                                      curr_entry->vme_end - curr_address);
11353                 curr_max_below = MIN(curr_max_below,
11354                                      curr_address - curr_entry->vme_start);
11355
11356                 if (!curr_entry->is_sub_map ||
11357                     curr_depth >= user_max_depth) {
11358                         /*
11359                          * We hit a leaf map or we reached the maximum depth
11360                          * we could, so stop looking.  Keep the current map
11361                          * locked.
11362                          */
11363                         break;
11364                 }
11365
11366                 /*
11367                  * Get down to the next submap level.
11368                  */
11369
11370                 /*
11371                  * Lock the next level and unlock the current level,
11372                  * unless we need to keep it locked to access the "next_entry"
11373                  * later.
11374                  */
11375                 if (not_in_kdp) {
11376                         vm_map_lock_read(VME_SUBMAP(curr_entry));
11377                 }
11378                 if (curr_map == next_map) {
11379                         /* keep "next_map" locked in case we need it */
11380                 } else {
11381                         /* release this map */
11382                         if (not_in_kdp)
11383                                 vm_map_unlock_read(curr_map);
11384                 }
11385
11386                 /*
11387                  * Adjust the offset.  "curr_entry" maps the submap
11388                  * at relative address "curr_entry->vme_start" in the
11389                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
11390                  * bytes of the submap.
11391                  * "curr_offset" always represents the offset of a virtual
11392                  * address in the curr_map relative to the absolute address
11393                  * space (i.e. the top-level VM map).
11394                  */
11395                 curr_offset +=
11396                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
11397                 curr_address = user_address + curr_offset;
11398                 /* switch to the submap */
11399                 curr_map = VME_SUBMAP(curr_entry);
11400                 curr_depth++;
11401                 curr_entry = NULL;
11402         }
11403
11404         if (curr_entry == NULL) {
11405                 /* no VM region contains the address... */
11406                 if (next_entry == NULL) {
11407                         /* ... and no VM region follows it either */
11408                         return KERN_INVALID_ADDRESS;
11409                 }
11410                 /* ... gather info about the next VM region */
11411                 curr_entry = next_entry;
11412                 curr_map = next_map;    /* still locked ... */
11413                 curr_address = next_address;
11414                 curr_skip = next_skip;
11415                 curr_offset = next_offset;
11416                 curr_depth = next_depth;
11417                 curr_max_above = next_max_above;
11418                 curr_max_below = next_max_below;
11419         } else {
11420                 /* we won't need "next_entry" after all */
11421                 if (next_entry != NULL) {
11422                         /* release "next_map" */
11423                         if (next_map != curr_map && not_in_kdp) {
11424                                 vm_map_unlock_read(next_map);
11425                         }
11426                 }
11427         }
11428         next_entry = NULL;
11429         next_map = NULL;
11430         next_offset = 0;
11431         next_skip = 0;
11432         next_depth = 0;
11433         next_max_below = -1;
11434         next_max_above = -1;
11435
11436         if (curr_entry->is_sub_map &&
11437             curr_depth < user_max_depth) {
11438                 /*
11439                  * We're not as deep as we could be:  we must have
11440                  * gone back up after not finding anything mapped
11441                  * below the original top-level map entry's.
11442                  * Let's move "curr_address" forward and recurse again.
11443                  */
11444                 user_address = curr_address;
11445                 goto recurse_again;
11446         }
11447
11448         *nesting_depth = curr_depth;
11449         *size = curr_max_above + curr_max_below;
11450         *address = user_address + curr_skip - curr_max_below;
11451
11452 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
11453 // so probably should be a real 32b ID vs. ptr.
11454 // Current users just check for equality
11455 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
11456
11457         if (look_for_pages) {
11458                 submap_info->user_tag = VME_ALIAS(curr_entry);
11459                 submap_info->offset = VME_OFFSET(curr_entry);
11460                 submap_info->protection = curr_entry->protection;
11461                 submap_info->inheritance = curr_entry->inheritance;
11462                 submap_info->max_protection = curr_entry->max_protection;
11463                 submap_info->behavior = curr_entry->behavior;
11464                 submap_info->user_wired_count = curr_entry->user_wired_count;
11465                 submap_info->is_submap = curr_entry->is_sub_map;
11466                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11467         } else {
11468                 short_info->user_tag = VME_ALIAS(curr_entry);
11469                 short_info->offset = VME_OFFSET(curr_entry);
11470                 short_info->protection = curr_entry->protection;
11471                 short_info->inheritance = curr_entry->inheritance;
11472                 short_info->max_protection = curr_entry->max_protection;
11473                 short_info->behavior = curr_entry->behavior;
11474                 short_info->user_wired_count = curr_entry->user_wired_count;
11475                 short_info->is_submap = curr_entry->is_sub_map;
11476                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
11477         }
11478
11479         extended.pages_resident = 0;
11480         extended.pages_swapped_out = 0;
11481         extended.pages_shared_now_private = 0;
11482         extended.pages_dirtied = 0;
11483         extended.pages_reusable = 0;
11484         extended.external_pager = 0;
11485         extended.shadow_depth = 0;
11486         extended.share_mode = SM_EMPTY;
11487         extended.ref_count = 0;
11488
11489         if (not_in_kdp) {
11490                 if (!curr_entry->is_sub_map) {
11491                         vm_map_offset_t range_start, range_end;
11492                         range_start = MAX((curr_address - curr_max_below),
11493                                           curr_entry->vme_start);
11494                         range_end = MIN((curr_address + curr_max_above),
11495                                         curr_entry->vme_end);
11496                         vm_map_region_walk(curr_map,
11497                                            range_start,
11498                                            curr_entry,
11499                                            (VME_OFFSET(curr_entry) +
11500                                             (range_start -
11501                                              curr_entry->vme_start)),
11502                                            range_end - range_start,
11503                                            &extended,
11504                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
11505                         if (extended.external_pager &&
11506                             extended.ref_count == 2 &&
11507                             extended.share_mode == SM_SHARED) {
11508                                 extended.share_mode = SM_PRIVATE;
11509                         }
11510                 } else {
11511                         if (curr_entry->use_pmap) {
11512                                 extended.share_mode = SM_TRUESHARED;
11513                         } else {
11514                                 extended.share_mode = SM_PRIVATE;
11515                         }
11516                         extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
11517                 }
11518         }
11519
11520         if (look_for_pages) {
11521                 submap_info->pages_resident = extended.pages_resident;
11522                 submap_info->pages_swapped_out = extended.pages_swapped_out;
11523                 submap_info->pages_shared_now_private =
11524                         extended.pages_shared_now_private;
11525                 submap_info->pages_dirtied = extended.pages_dirtied;
11526                 submap_info->external_pager = extended.external_pager;
11527                 submap_info->shadow_depth = extended.shadow_depth;
11528                 submap_info->share_mode = extended.share_mode;
11529                 submap_info->ref_count = extended.ref_count;
11530
11531                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
11532                         submap_info->pages_reusable = extended.pages_reusable;
11533                 }
11534         } else {
11535                 short_info->external_pager = extended.external_pager;
11536                 short_info->shadow_depth = extended.shadow_depth;
11537                 short_info->share_mode = extended.share_mode;
11538                 short_info->ref_count = extended.ref_count;
11539         }
11540
11541         if (not_in_kdp) {
11542                 vm_map_unlock_read(curr_map);
11543         }
11544
11545         return KERN_SUCCESS;
11546 }
11547
11548 /*
11549  *      vm_region:
11550  *
11551  *      User call to obtain information about a region in
11552  *      a task's address map. Currently, only one flavor is
11553  *      supported.
11554  *
11555  *      XXX The reserved and behavior fields cannot be filled
11556  *          in until the vm merge from the IK is completed, and
11557  *          vm_reserve is implemented.
11558  */
11559
11560 kern_return_t
11561 vm_map_region(
11562         vm_map_t                 map,
11563         vm_map_offset_t *address,               /* IN/OUT */
11564         vm_map_size_t           *size,                  /* OUT */
11565         vm_region_flavor_t       flavor,                /* IN */
11566         vm_region_info_t         info,                  /* OUT */
11567         mach_msg_type_number_t  *count, /* IN/OUT */
11568         mach_port_t             *object_name)           /* OUT */
11569 {
11570         vm_map_entry_t          tmp_entry;
11571         vm_map_entry_t          entry;
11572         vm_map_offset_t         start;
11573
11574         if (map == VM_MAP_NULL)
11575                 return(KERN_INVALID_ARGUMENT);
11576
11577         switch (flavor) {
11578
11579         case VM_REGION_BASIC_INFO:
11580                 /* legacy for old 32-bit objects info */
11581         {
11582                 vm_region_basic_info_t  basic;
11583
11584                 if (*count < VM_REGION_BASIC_INFO_COUNT)
11585                         return(KERN_INVALID_ARGUMENT);
11586
11587                 basic = (vm_region_basic_info_t) info;
11588                 *count = VM_REGION_BASIC_INFO_COUNT;
11589
11590                 vm_map_lock_read(map);
11591
11592                 start = *address;
11593                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11594                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11595                                 vm_map_unlock_read(map);
11596                                 return(KERN_INVALID_ADDRESS);
11597                         }
11598                 } else {
11599                         entry = tmp_entry;
11600                 }
11601
11602                 start = entry->vme_start;
11603
11604                 basic->offset = (uint32_t)VME_OFFSET(entry);
11605                 basic->protection = entry->protection;
11606                 basic->inheritance = entry->inheritance;
11607                 basic->max_protection = entry->max_protection;
11608                 basic->behavior = entry->behavior;
11609                 basic->user_wired_count = entry->user_wired_count;
11610                 basic->reserved = entry->is_sub_map;
11611                 *address = start;
11612                 *size = (entry->vme_end - start);
11613
11614                 if (object_name) *object_name = IP_NULL;
11615                 if (entry->is_sub_map) {
11616                         basic->shared = FALSE;
11617                 } else {
11618                         basic->shared = entry->is_shared;
11619                 }
11620
11621                 vm_map_unlock_read(map);
11622                 return(KERN_SUCCESS);
11623         }
11624
11625         case VM_REGION_BASIC_INFO_64:
11626         {
11627                 vm_region_basic_info_64_t       basic;
11628
11629                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
11630                         return(KERN_INVALID_ARGUMENT);
11631
11632                 basic = (vm_region_basic_info_64_t) info;
11633                 *count = VM_REGION_BASIC_INFO_COUNT_64;
11634
11635                 vm_map_lock_read(map);
11636
11637                 start = *address;
11638                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11639                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11640                                 vm_map_unlock_read(map);
11641                                 return(KERN_INVALID_ADDRESS);
11642                         }
11643                 } else {
11644                         entry = tmp_entry;
11645                 }
11646
11647                 start = entry->vme_start;
11648
11649                 basic->offset = VME_OFFSET(entry);
11650                 basic->protection = entry->protection;
11651                 basic->inheritance = entry->inheritance;
11652                 basic->max_protection = entry->max_protection;
11653                 basic->behavior = entry->behavior;
11654                 basic->user_wired_count = entry->user_wired_count;
11655                 basic->reserved = entry->is_sub_map;
11656                 *address = start;
11657                 *size = (entry->vme_end - start);
11658
11659                 if (object_name) *object_name = IP_NULL;
11660                 if (entry->is_sub_map) {
11661                         basic->shared = FALSE;
11662                 } else {
11663                         basic->shared = entry->is_shared;
11664                 }
11665
11666                 vm_map_unlock_read(map);
11667                 return(KERN_SUCCESS);
11668         }
11669         case VM_REGION_EXTENDED_INFO:
11670                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11671                         return(KERN_INVALID_ARGUMENT);
11672                 /*fallthru*/
11673         case VM_REGION_EXTENDED_INFO__legacy:
11674                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11675                         return KERN_INVALID_ARGUMENT;
11676
11677         {
11678                 vm_region_extended_info_t       extended;
11679                 mach_msg_type_number_t original_count;
11680
11681                 extended = (vm_region_extended_info_t) info;
11682
11683                 vm_map_lock_read(map);
11684
11685                 start = *address;
11686                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11687                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11688                                 vm_map_unlock_read(map);
11689                                 return(KERN_INVALID_ADDRESS);
11690                         }
11691                 } else {
11692                         entry = tmp_entry;
11693                 }
11694                 start = entry->vme_start;
11695
11696                 extended->protection = entry->protection;
11697                 extended->user_tag = VME_ALIAS(entry);
11698                 extended->pages_resident = 0;
11699                 extended->pages_swapped_out = 0;
11700                 extended->pages_shared_now_private = 0;
11701                 extended->pages_dirtied = 0;
11702                 extended->external_pager = 0;
11703                 extended->shadow_depth = 0;
11704
11705                 original_count = *count;
11706                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11707                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11708                 } else {
11709                         extended->pages_reusable = 0;
11710                         *count = VM_REGION_EXTENDED_INFO_COUNT;
11711                 }
11712
11713                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
11714
11715                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11716                         extended->share_mode = SM_PRIVATE;
11717
11718                 if (object_name)
11719                         *object_name = IP_NULL;
11720                 *address = start;
11721                 *size = (entry->vme_end - start);
11722
11723                 vm_map_unlock_read(map);
11724                 return(KERN_SUCCESS);
11725         }
11726         case VM_REGION_TOP_INFO:
11727         {
11728                 vm_region_top_info_t    top;
11729
11730                 if (*count < VM_REGION_TOP_INFO_COUNT)
11731                         return(KERN_INVALID_ARGUMENT);
11732
11733                 top = (vm_region_top_info_t) info;
11734                 *count = VM_REGION_TOP_INFO_COUNT;
11735
11736                 vm_map_lock_read(map);
11737
11738                 start = *address;
11739                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11740                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11741                                 vm_map_unlock_read(map);
11742                                 return(KERN_INVALID_ADDRESS);
11743                         }
11744                 } else {
11745                         entry = tmp_entry;
11746
11747                 }
11748                 start = entry->vme_start;
11749
11750                 top->private_pages_resident = 0;
11751                 top->shared_pages_resident = 0;
11752
11753                 vm_map_region_top_walk(entry, top);
11754
11755                 if (object_name)
11756                         *object_name = IP_NULL;
11757                 *address = start;
11758                 *size = (entry->vme_end - start);
11759
11760                 vm_map_unlock_read(map);
11761                 return(KERN_SUCCESS);
11762         }
11763         default:
11764                 return(KERN_INVALID_ARGUMENT);
11765         }
11766 }
11767
11768 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
11769         MIN((entry_size),                                               \
11770             ((obj)->all_reusable ?                                      \
11771              (obj)->wired_page_count :                                  \
11772              (obj)->resident_page_count - (obj)->reusable_page_count))
11773
11774 void
11775 vm_map_region_top_walk(
11776         vm_map_entry_t             entry,
11777         vm_region_top_info_t       top)
11778 {
11779
11780         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
11781                 top->share_mode = SM_EMPTY;
11782                 top->ref_count = 0;
11783                 top->obj_id = 0;
11784                 return;
11785         }
11786
11787         {
11788                 struct  vm_object *obj, *tmp_obj;
11789                 int             ref_count;
11790                 uint32_t        entry_size;
11791
11792                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11793
11794                 obj = VME_OBJECT(entry);
11795
11796                 vm_object_lock(obj);
11797
11798                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11799                         ref_count--;
11800
11801                 assert(obj->reusable_page_count <= obj->resident_page_count);
11802                 if (obj->shadow) {
11803                         if (ref_count == 1)
11804                                 top->private_pages_resident =
11805                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11806                         else
11807                                 top->shared_pages_resident =
11808                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11809                         top->ref_count  = ref_count;
11810                         top->share_mode = SM_COW;
11811
11812                         while ((tmp_obj = obj->shadow)) {
11813                                 vm_object_lock(tmp_obj);
11814                                 vm_object_unlock(obj);
11815                                 obj = tmp_obj;
11816
11817                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11818                                         ref_count--;
11819
11820                                 assert(obj->reusable_page_count <= obj->resident_page_count);
11821                                 top->shared_pages_resident +=
11822                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11823                                 top->ref_count += ref_count - 1;
11824                         }
11825                 } else {
11826                         if (entry->superpage_size) {
11827                                 top->share_mode = SM_LARGE_PAGE;
11828                                 top->shared_pages_resident = 0;
11829                                 top->private_pages_resident = entry_size;
11830                         } else if (entry->needs_copy) {
11831                                 top->share_mode = SM_COW;
11832                                 top->shared_pages_resident =
11833                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11834                         } else {
11835                                 if (ref_count == 1 ||
11836                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11837                                         top->share_mode = SM_PRIVATE;
11838                                                 top->private_pages_resident =
11839                                                         OBJ_RESIDENT_COUNT(obj,
11840                                                                            entry_size);
11841                                 } else {
11842                                         top->share_mode = SM_SHARED;
11843                                         top->shared_pages_resident =
11844                                                 OBJ_RESIDENT_COUNT(obj,
11845                                                                   entry_size);
11846                                 }
11847                         }
11848                         top->ref_count = ref_count;
11849                 }
11850                 /* XXX K64: obj_id will be truncated */
11851                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11852
11853                 vm_object_unlock(obj);
11854         }
11855 }
11856
11857 void
11858 vm_map_region_walk(
11859         vm_map_t                        map,
11860         vm_map_offset_t                 va,
11861         vm_map_entry_t                  entry,
11862         vm_object_offset_t              offset,
11863         vm_object_size_t                range,
11864         vm_region_extended_info_t       extended,
11865         boolean_t                       look_for_pages,
11866         mach_msg_type_number_t count)
11867 {
11868         register struct vm_object *obj, *tmp_obj;
11869         register vm_map_offset_t       last_offset;
11870         register int               i;
11871         register int               ref_count;
11872         struct vm_object        *shadow_object;
11873         int                     shadow_depth;
11874
11875         if ((VME_OBJECT(entry) == 0) ||
11876             (entry->is_sub_map) ||
11877             (VME_OBJECT(entry)->phys_contiguous &&
11878              !entry->superpage_size)) {
11879                 extended->share_mode = SM_EMPTY;
11880                 extended->ref_count = 0;
11881                 return;
11882         }
11883
11884         if (entry->superpage_size) {
11885                 extended->shadow_depth = 0;
11886                 extended->share_mode = SM_LARGE_PAGE;
11887                 extended->ref_count = 1;
11888                 extended->external_pager = 0;
11889                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11890                 extended->shadow_depth = 0;
11891                 return;
11892         }
11893
11894         {
11895                 obj = VME_OBJECT(entry);
11896
11897                 vm_object_lock(obj);
11898
11899                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11900                         ref_count--;
11901
11902                 if (look_for_pages) {
11903                         for (last_offset = offset + range;
11904                              offset < last_offset;
11905                              offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11906                                         vm_map_region_look_for_page(map, va, obj,
11907                                                                     offset, ref_count,
11908                                                                     0, extended, count);
11909                         }
11910                 } else {
11911                         shadow_object = obj->shadow;
11912                         shadow_depth = 0;
11913
11914                         if ( !(obj->pager_trusted) && !(obj->internal))
11915                                 extended->external_pager = 1;
11916
11917                         if (shadow_object != VM_OBJECT_NULL) {
11918                                 vm_object_lock(shadow_object);
11919                                 for (;
11920                                      shadow_object != VM_OBJECT_NULL;
11921                                      shadow_depth++) {
11922                                         vm_object_t     next_shadow;
11923
11924                                         if ( !(shadow_object->pager_trusted) &&
11925                                              !(shadow_object->internal))
11926                                                 extended->external_pager = 1;
11927
11928                                         next_shadow = shadow_object->shadow;
11929                                         if (next_shadow) {
11930                                                 vm_object_lock(next_shadow);
11931                                         }
11932                                         vm_object_unlock(shadow_object);
11933                                         shadow_object = next_shadow;
11934                                 }
11935                         }
11936                         extended->shadow_depth = shadow_depth;
11937                 }
11938
11939                 if (extended->shadow_depth || entry->needs_copy)
11940                         extended->share_mode = SM_COW;
11941                 else {
11942                         if (ref_count == 1)
11943                                 extended->share_mode = SM_PRIVATE;
11944                         else {
11945                                 if (obj->true_share)
11946                                         extended->share_mode = SM_TRUESHARED;
11947                                 else
11948                                         extended->share_mode = SM_SHARED;
11949                         }
11950                 }
11951                 extended->ref_count = ref_count - extended->shadow_depth;
11952
11953                 for (i = 0; i < extended->shadow_depth; i++) {
11954                         if ((tmp_obj = obj->shadow) == 0)
11955                                 break;
11956                         vm_object_lock(tmp_obj);
11957                         vm_object_unlock(obj);
11958
11959                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11960                                 ref_count--;
11961
11962                         extended->ref_count += ref_count;
11963                         obj = tmp_obj;
11964                 }
11965                 vm_object_unlock(obj);
11966
11967                 if (extended->share_mode == SM_SHARED) {
11968                         register vm_map_entry_t      cur;
11969                         register vm_map_entry_t      last;
11970                         int      my_refs;
11971
11972                         obj = VME_OBJECT(entry);
11973                         last = vm_map_to_entry(map);
11974                         my_refs = 0;
11975
11976                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11977                                 ref_count--;
11978                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11979                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
11980
11981                         if (my_refs == ref_count)
11982                                 extended->share_mode = SM_PRIVATE_ALIASED;
11983                         else if (my_refs > 1)
11984                                 extended->share_mode = SM_SHARED_ALIASED;
11985                 }
11986         }
11987 }
11988
11989
11990 /* object is locked on entry and locked on return */
11991
11992
11993 static void
11994 vm_map_region_look_for_page(
11995         __unused vm_map_t               map,
11996         __unused vm_map_offset_t        va,
11997         vm_object_t                     object,
11998         vm_object_offset_t              offset,
11999         int                             max_refcnt,
12000         int                             depth,
12001         vm_region_extended_info_t       extended,
12002         mach_msg_type_number_t count)
12003 {
12004         register vm_page_t      p;
12005         register vm_object_t    shadow;
12006         register int            ref_count;
12007         vm_object_t             caller_object;
12008         kern_return_t           kr;
12009         shadow = object->shadow;
12010         caller_object = object;
12011
12012
12013         while (TRUE) {
12014
12015                 if ( !(object->pager_trusted) && !(object->internal))
12016                         extended->external_pager = 1;
12017
12018                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
12019                         if (shadow && (max_refcnt == 1))
12020                                 extended->pages_shared_now_private++;
12021
12022                         if (!p->fictitious &&
12023                             (p->dirty || pmap_is_modified(p->phys_page)))
12024                                 extended->pages_dirtied++;
12025                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
12026                                 if (p->reusable || p->object->all_reusable) {
12027                                         extended->pages_reusable++;
12028                                 }
12029                         }
12030
12031                         extended->pages_resident++;
12032
12033                         if(object != caller_object)
12034                                 vm_object_unlock(object);
12035
12036                         return;
12037                 }
12038 #if     MACH_PAGEMAP
12039                 if (object->existence_map) {
12040                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
12041
12042                                 extended->pages_swapped_out++;
12043
12044                                 if(object != caller_object)
12045                                         vm_object_unlock(object);
12046
12047                                 return;
12048                         }
12049                 } else
12050 #endif /* MACH_PAGEMAP */
12051                 if (object->internal &&
12052                     object->alive &&
12053                     !object->terminating &&
12054                     object->pager_ready) {
12055
12056                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
12057                                 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
12058                                                                   offset)
12059                                     == VM_EXTERNAL_STATE_EXISTS) {
12060                                         /* the pager has that page */
12061                                         extended->pages_swapped_out++;
12062                                         if (object != caller_object)
12063                                                 vm_object_unlock(object);
12064                                         return;
12065                                 }
12066                         } else {
12067                                 memory_object_t pager;
12068
12069                                 vm_object_paging_begin(object);
12070                                 pager = object->pager;
12071                                 vm_object_unlock(object);
12072
12073                                 kr = memory_object_data_request(
12074                                         pager,
12075                                         offset + object->paging_offset,
12076                                         0, /* just poke the pager */
12077                                         VM_PROT_READ,
12078                                         NULL);
12079
12080                                 vm_object_lock(object);
12081                                 vm_object_paging_end(object);
12082
12083                                 if (kr == KERN_SUCCESS) {
12084                                         /* the pager has that page */
12085                                         extended->pages_swapped_out++;
12086                                         if (object != caller_object)
12087                                                 vm_object_unlock(object);
12088                                         return;
12089                                 }
12090                         }
12091                 }
12092
12093                 if (shadow) {
12094                         vm_object_lock(shadow);
12095
12096                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
12097                                 ref_count--;
12098
12099                         if (++depth > extended->shadow_depth)
12100                                 extended->shadow_depth = depth;
12101
12102                         if (ref_count > max_refcnt)
12103                                 max_refcnt = ref_count;
12104
12105                         if(object != caller_object)
12106                                 vm_object_unlock(object);
12107
12108                         offset = offset + object->vo_shadow_offset;
12109                         object = shadow;
12110                         shadow = object->shadow;
12111                         continue;
12112                 }
12113                 if(object != caller_object)
12114                         vm_object_unlock(object);
12115                 break;
12116         }
12117 }
12118
12119 static int
12120 vm_map_region_count_obj_refs(
12121         vm_map_entry_t    entry,
12122         vm_object_t       object)
12123 {
12124         register int ref_count;
12125         register vm_object_t chk_obj;
12126         register vm_object_t tmp_obj;
12127
12128         if (VME_OBJECT(entry) == 0)
12129                 return(0);
12130
12131         if (entry->is_sub_map)
12132                 return(0);
12133         else {
12134                 ref_count = 0;
12135
12136                 chk_obj = VME_OBJECT(entry);
12137                 vm_object_lock(chk_obj);
12138
12139                 while (chk_obj) {
12140                         if (chk_obj == object)
12141                                 ref_count++;
12142                         tmp_obj = chk_obj->shadow;
12143                         if (tmp_obj)
12144                                 vm_object_lock(tmp_obj);
12145                         vm_object_unlock(chk_obj);
12146
12147                         chk_obj = tmp_obj;
12148                 }
12149         }
12150         return(ref_count);
12151 }
12152
12153
12154 /*
12155  *      Routine:        vm_map_simplify
12156  *
12157  *      Description:
12158  *              Attempt to simplify the map representation in
12159  *              the vicinity of the given starting address.
12160  *      Note:
12161  *              This routine is intended primarily to keep the
12162  *              kernel maps more compact -- they generally don't
12163  *              benefit from the "expand a map entry" technology
12164  *              at allocation time because the adjacent entry
12165  *              is often wired down.
12166  */
12167 void
12168 vm_map_simplify_entry(
12169         vm_map_t        map,
12170         vm_map_entry_t  this_entry)
12171 {
12172         vm_map_entry_t  prev_entry;
12173
12174         counter(c_vm_map_simplify_entry_called++);
12175
12176         prev_entry = this_entry->vme_prev;
12177
12178         if ((this_entry != vm_map_to_entry(map)) &&
12179             (prev_entry != vm_map_to_entry(map)) &&
12180
12181             (prev_entry->vme_end == this_entry->vme_start) &&
12182
12183             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
12184             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
12185             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
12186                                     prev_entry->vme_start))
12187              == VME_OFFSET(this_entry)) &&
12188
12189             (prev_entry->behavior == this_entry->behavior) &&
12190             (prev_entry->needs_copy == this_entry->needs_copy) &&
12191             (prev_entry->protection == this_entry->protection) &&
12192             (prev_entry->max_protection == this_entry->max_protection) &&
12193             (prev_entry->inheritance == this_entry->inheritance) &&
12194             (prev_entry->use_pmap == this_entry->use_pmap) &&
12195             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
12196             (prev_entry->no_cache == this_entry->no_cache) &&
12197             (prev_entry->permanent == this_entry->permanent) &&
12198             (prev_entry->map_aligned == this_entry->map_aligned) &&
12199             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
12200             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
12201             /* from_reserved_zone: OK if that field doesn't match */
12202             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
12203             (prev_entry->vme_resilient_codesign ==
12204              this_entry->vme_resilient_codesign) &&
12205             (prev_entry->vme_resilient_media ==
12206              this_entry->vme_resilient_media) &&
12207
12208             (prev_entry->wired_count == this_entry->wired_count) &&
12209             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
12210
12211             (prev_entry->in_transition == FALSE) &&
12212             (this_entry->in_transition == FALSE) &&
12213             (prev_entry->needs_wakeup == FALSE) &&
12214             (this_entry->needs_wakeup == FALSE) &&
12215             (prev_entry->is_shared == FALSE) &&
12216             (this_entry->is_shared == FALSE) &&
12217             (prev_entry->superpage_size == FALSE) &&
12218             (this_entry->superpage_size == FALSE)
12219                 ) {
12220                 vm_map_store_entry_unlink(map, prev_entry);
12221                 assert(prev_entry->vme_start < this_entry->vme_end);
12222                 if (prev_entry->map_aligned)
12223                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
12224                                                    VM_MAP_PAGE_MASK(map)));
12225                 this_entry->vme_start = prev_entry->vme_start;
12226                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
12227
12228                 if (map->holelistenabled) {
12229                         vm_map_store_update_first_free(map, this_entry, TRUE);
12230                 }
12231
12232                 if (prev_entry->is_sub_map) {
12233                         vm_map_deallocate(VME_SUBMAP(prev_entry));
12234                 } else {
12235                         vm_object_deallocate(VME_OBJECT(prev_entry));
12236                 }
12237                 vm_map_entry_dispose(map, prev_entry);
12238                 SAVE_HINT_MAP_WRITE(map, this_entry);
12239                 counter(c_vm_map_simplified++);
12240         }
12241 }
12242
12243 void
12244 vm_map_simplify(
12245         vm_map_t        map,
12246         vm_map_offset_t start)
12247 {
12248         vm_map_entry_t  this_entry;
12249
12250         vm_map_lock(map);
12251         if (vm_map_lookup_entry(map, start, &this_entry)) {
12252                 vm_map_simplify_entry(map, this_entry);
12253                 vm_map_simplify_entry(map, this_entry->vme_next);
12254         }
12255         counter(c_vm_map_simplify_called++);
12256         vm_map_unlock(map);
12257 }
12258
12259 static void
12260 vm_map_simplify_range(
12261         vm_map_t        map,
12262         vm_map_offset_t start,
12263         vm_map_offset_t end)
12264 {
12265         vm_map_entry_t  entry;
12266
12267         /*
12268          * The map should be locked (for "write") by the caller.
12269          */
12270
12271         if (start >= end) {
12272                 /* invalid address range */
12273                 return;
12274         }
12275
12276         start = vm_map_trunc_page(start,
12277                                   VM_MAP_PAGE_MASK(map));
12278         end = vm_map_round_page(end,
12279                                 VM_MAP_PAGE_MASK(map));
12280
12281         if (!vm_map_lookup_entry(map, start, &entry)) {
12282                 /* "start" is not mapped and "entry" ends before "start" */
12283                 if (entry == vm_map_to_entry(map)) {
12284                         /* start with first entry in the map */
12285                         entry = vm_map_first_entry(map);
12286                 } else {
12287                         /* start with next entry */
12288                         entry = entry->vme_next;
12289                 }
12290         }
12291
12292         while (entry != vm_map_to_entry(map) &&
12293                entry->vme_start <= end) {
12294                 /* try and coalesce "entry" with its previous entry */
12295                 vm_map_simplify_entry(map, entry);
12296                 entry = entry->vme_next;
12297         }
12298 }
12299
12300
12301 /*
12302  *      Routine:        vm_map_machine_attribute
12303  *      Purpose:
12304  *              Provide machine-specific attributes to mappings,
12305  *              such as cachability etc. for machines that provide
12306  *              them.  NUMA architectures and machines with big/strange
12307  *              caches will use this.
12308  *      Note:
12309  *              Responsibilities for locking and checking are handled here,
12310  *              everything else in the pmap module. If any non-volatile
12311  *              information must be kept, the pmap module should handle
12312  *              it itself. [This assumes that attributes do not
12313  *              need to be inherited, which seems ok to me]
12314  */
12315 kern_return_t
12316 vm_map_machine_attribute(
12317         vm_map_t                        map,
12318         vm_map_offset_t         start,
12319         vm_map_offset_t         end,
12320         vm_machine_attribute_t  attribute,
12321         vm_machine_attribute_val_t* value)              /* IN/OUT */
12322 {
12323         kern_return_t   ret;
12324         vm_map_size_t sync_size;
12325         vm_map_entry_t entry;
12326
12327         if (start < vm_map_min(map) || end > vm_map_max(map))
12328                 return KERN_INVALID_ADDRESS;
12329
12330         /* Figure how much memory we need to flush (in page increments) */
12331         sync_size = end - start;
12332
12333         vm_map_lock(map);
12334
12335         if (attribute != MATTR_CACHE) {
12336                 /* If we don't have to find physical addresses, we */
12337                 /* don't have to do an explicit traversal here.    */
12338                 ret = pmap_attribute(map->pmap, start, end-start,
12339                                      attribute, value);
12340                 vm_map_unlock(map);
12341                 return ret;
12342         }
12343
12344         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
12345
12346         while(sync_size) {
12347                 if (vm_map_lookup_entry(map, start, &entry)) {
12348                         vm_map_size_t   sub_size;
12349                         if((entry->vme_end - start) > sync_size) {
12350                                 sub_size = sync_size;
12351                                 sync_size = 0;
12352                         } else {
12353                                 sub_size = entry->vme_end - start;
12354                                 sync_size -= sub_size;
12355                         }
12356                         if(entry->is_sub_map) {
12357                                 vm_map_offset_t sub_start;
12358                                 vm_map_offset_t sub_end;
12359
12360                                 sub_start = (start - entry->vme_start)
12361                                         + VME_OFFSET(entry);
12362                                 sub_end = sub_start + sub_size;
12363                                 vm_map_machine_attribute(
12364                                         VME_SUBMAP(entry),
12365                                         sub_start,
12366                                         sub_end,
12367                                         attribute, value);
12368                         } else {
12369                                 if (VME_OBJECT(entry)) {
12370                                         vm_page_t               m;
12371                                         vm_object_t             object;
12372                                         vm_object_t             base_object;
12373                                         vm_object_t             last_object;
12374                                         vm_object_offset_t      offset;
12375                                         vm_object_offset_t      base_offset;
12376                                         vm_map_size_t           range;
12377                                         range = sub_size;
12378                                         offset = (start - entry->vme_start)
12379                                                 + VME_OFFSET(entry);
12380                                         base_offset = offset;
12381                                         object = VME_OBJECT(entry);
12382                                         base_object = object;
12383                                         last_object = NULL;
12384
12385                                         vm_object_lock(object);
12386
12387                                         while (range) {
12388                                                 m = vm_page_lookup(
12389                                                         object, offset);
12390
12391                                                 if (m && !m->fictitious) {
12392                                                         ret =
12393                                                                 pmap_attribute_cache_sync(
12394                                                                         m->phys_page,
12395                                                                         PAGE_SIZE,
12396                                                                         attribute, value);
12397
12398                                                 } else if (object->shadow) {
12399                                                         offset = offset + object->vo_shadow_offset;
12400                                                         last_object = object;
12401                                                         object = object->shadow;
12402                                                         vm_object_lock(last_object->shadow);
12403                                                         vm_object_unlock(last_object);
12404                                                         continue;
12405                                                 }
12406                                                 range -= PAGE_SIZE;
12407
12408                                                 if (base_object != object) {
12409                                                         vm_object_unlock(object);
12410                                                         vm_object_lock(base_object);
12411                                                         object = base_object;
12412                                                 }
12413                                                 /* Bump to the next page */
12414                                                 base_offset += PAGE_SIZE;
12415                                                 offset = base_offset;
12416                                         }
12417                                         vm_object_unlock(object);
12418                                 }
12419                         }
12420                         start += sub_size;
12421                 } else {
12422                         vm_map_unlock(map);
12423                         return KERN_FAILURE;
12424                 }
12425
12426         }
12427
12428         vm_map_unlock(map);
12429
12430         return ret;
12431 }
12432
12433 /*
12434  *      vm_map_behavior_set:
12435  *
12436  *      Sets the paging reference behavior of the specified address
12437  *      range in the target map.  Paging reference behavior affects
12438  *      how pagein operations resulting from faults on the map will be
12439  *      clustered.
12440  */
12441 kern_return_t
12442 vm_map_behavior_set(
12443         vm_map_t        map,
12444         vm_map_offset_t start,
12445         vm_map_offset_t end,
12446         vm_behavior_t   new_behavior)
12447 {
12448         register vm_map_entry_t entry;
12449         vm_map_entry_t  temp_entry;
12450
12451         XPR(XPR_VM_MAP,
12452             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
12453             map, start, end, new_behavior, 0);
12454
12455         if (start > end ||
12456             start < vm_map_min(map) ||
12457             end > vm_map_max(map)) {
12458                 return KERN_NO_SPACE;
12459         }
12460
12461         switch (new_behavior) {
12462
12463         /*
12464          * This first block of behaviors all set a persistent state on the specified
12465          * memory range.  All we have to do here is to record the desired behavior
12466          * in the vm_map_entry_t's.
12467          */
12468
12469         case VM_BEHAVIOR_DEFAULT:
12470         case VM_BEHAVIOR_RANDOM:
12471         case VM_BEHAVIOR_SEQUENTIAL:
12472         case VM_BEHAVIOR_RSEQNTL:
12473         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
12474                 vm_map_lock(map);
12475
12476                 /*
12477                  *      The entire address range must be valid for the map.
12478                  *      Note that vm_map_range_check() does a
12479                  *      vm_map_lookup_entry() internally and returns the
12480                  *      entry containing the start of the address range if
12481                  *      the entire range is valid.
12482                  */
12483                 if (vm_map_range_check(map, start, end, &temp_entry)) {
12484                         entry = temp_entry;
12485                         vm_map_clip_start(map, entry, start);
12486                 }
12487                 else {
12488                         vm_map_unlock(map);
12489                         return(KERN_INVALID_ADDRESS);
12490                 }
12491
12492                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
12493                         vm_map_clip_end(map, entry, end);
12494                         if (entry->is_sub_map) {
12495                                 assert(!entry->use_pmap);
12496                         }
12497
12498                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
12499                                 entry->zero_wired_pages = TRUE;
12500                         } else {
12501                                 entry->behavior = new_behavior;
12502                         }
12503                         entry = entry->vme_next;
12504                 }
12505
12506                 vm_map_unlock(map);
12507                 break;
12508
12509         /*
12510          * The rest of these are different from the above in that they cause
12511          * an immediate action to take place as opposed to setting a behavior that
12512          * affects future actions.
12513          */
12514
12515         case VM_BEHAVIOR_WILLNEED:
12516                 return vm_map_willneed(map, start, end);
12517
12518         case VM_BEHAVIOR_DONTNEED:
12519                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
12520
12521         case VM_BEHAVIOR_FREE:
12522                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
12523
12524         case VM_BEHAVIOR_REUSABLE:
12525                 return vm_map_reusable_pages(map, start, end);
12526
12527         case VM_BEHAVIOR_REUSE:
12528                 return vm_map_reuse_pages(map, start, end);
12529
12530         case VM_BEHAVIOR_CAN_REUSE:
12531                 return vm_map_can_reuse(map, start, end);
12532
12533 #if MACH_ASSERT
12534         case VM_BEHAVIOR_PAGEOUT:
12535                 return vm_map_pageout(map, start, end);
12536 #endif /* MACH_ASSERT */
12537
12538         default:
12539                 return(KERN_INVALID_ARGUMENT);
12540         }
12541
12542         return(KERN_SUCCESS);
12543 }
12544
12545
12546 /*
12547  * Internals for madvise(MADV_WILLNEED) system call.
12548  *
12549  * The present implementation is to do a read-ahead if the mapping corresponds
12550  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
12551  * and basically ignore the "advice" (which we are always free to do).
12552  */
12553
12554
12555 static kern_return_t
12556 vm_map_willneed(
12557         vm_map_t        map,
12558         vm_map_offset_t start,
12559         vm_map_offset_t end
12560 )
12561 {
12562         vm_map_entry_t                  entry;
12563         vm_object_t                     object;
12564         memory_object_t                 pager;
12565         struct vm_object_fault_info     fault_info;
12566         kern_return_t                   kr;
12567         vm_object_size_t                len;
12568         vm_object_offset_t              offset;
12569
12570         /*
12571          * Fill in static values in fault_info.  Several fields get ignored by the code
12572          * we call, but we'll fill them in anyway since uninitialized fields are bad
12573          * when it comes to future backwards compatibility.
12574          */
12575
12576         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
12577         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
12578         fault_info.no_cache      = FALSE;                       /* ignored value */
12579         fault_info.stealth       = TRUE;
12580         fault_info.io_sync = FALSE;
12581         fault_info.cs_bypass = FALSE;
12582         fault_info.mark_zf_absent = FALSE;
12583         fault_info.batch_pmap_op = FALSE;
12584
12585         /*
12586          * The MADV_WILLNEED operation doesn't require any changes to the
12587          * vm_map_entry_t's, so the read lock is sufficient.
12588          */
12589
12590         vm_map_lock_read(map);
12591
12592         /*
12593          * The madvise semantics require that the address range be fully
12594          * allocated with no holes.  Otherwise, we're required to return
12595          * an error.
12596          */
12597
12598         if (! vm_map_range_check(map, start, end, &entry)) {
12599                 vm_map_unlock_read(map);
12600                 return KERN_INVALID_ADDRESS;
12601         }
12602
12603         /*
12604          * Examine each vm_map_entry_t in the range.
12605          */
12606         for (; entry != vm_map_to_entry(map) && start < end; ) {
12607
12608                 /*
12609                  * The first time through, the start address could be anywhere
12610                  * within the vm_map_entry we found.  So adjust the offset to
12611                  * correspond.  After that, the offset will always be zero to
12612                  * correspond to the beginning of the current vm_map_entry.
12613                  */
12614                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
12615
12616                 /*
12617                  * Set the length so we don't go beyond the end of the
12618                  * map_entry or beyond the end of the range we were given.
12619                  * This range could span also multiple map entries all of which
12620                  * map different files, so make sure we only do the right amount
12621                  * of I/O for each object.  Note that it's possible for there
12622                  * to be multiple map entries all referring to the same object
12623                  * but with different page permissions, but it's not worth
12624                  * trying to optimize that case.
12625                  */
12626                 len = MIN(entry->vme_end - start, end - start);
12627
12628                 if ((vm_size_t) len != len) {
12629                         /* 32-bit overflow */
12630                         len = (vm_size_t) (0 - PAGE_SIZE);
12631                 }
12632                 fault_info.cluster_size = (vm_size_t) len;
12633                 fault_info.lo_offset    = offset;
12634                 fault_info.hi_offset    = offset + len;
12635                 fault_info.user_tag     = VME_ALIAS(entry);
12636                 fault_info.pmap_options = 0;
12637                 if (entry->iokit_acct ||
12638                     (!entry->is_sub_map && !entry->use_pmap)) {
12639                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12640                 }
12641
12642                 /*
12643                  * If there's no read permission to this mapping, then just
12644                  * skip it.
12645                  */
12646                 if ((entry->protection & VM_PROT_READ) == 0) {
12647                         entry = entry->vme_next;
12648                         start = entry->vme_start;
12649                         continue;
12650                 }
12651
12652                 /*
12653                  * Find the file object backing this map entry.  If there is
12654                  * none, then we simply ignore the "will need" advice for this
12655                  * entry and go on to the next one.
12656                  */
12657                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12658                         entry = entry->vme_next;
12659                         start = entry->vme_start;
12660                         continue;
12661                 }
12662
12663                 /*
12664                  * The data_request() could take a long time, so let's
12665                  * release the map lock to avoid blocking other threads.
12666                  */
12667                 vm_map_unlock_read(map);
12668
12669                 vm_object_paging_begin(object);
12670                 pager = object->pager;
12671                 vm_object_unlock(object);
12672
12673                 /*
12674                  * Get the data from the object asynchronously.
12675                  *
12676                  * Note that memory_object_data_request() places limits on the
12677                  * amount of I/O it will do.  Regardless of the len we
12678                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12679                  * silently truncates the len to that size.  This isn't
12680                  * necessarily bad since madvise shouldn't really be used to
12681                  * page in unlimited amounts of data.  Other Unix variants
12682                  * limit the willneed case as well.  If this turns out to be an
12683                  * issue for developers, then we can always adjust the policy
12684                  * here and still be backwards compatible since this is all
12685                  * just "advice".
12686                  */
12687                 kr = memory_object_data_request(
12688                         pager,
12689                         offset + object->paging_offset,
12690                         0,      /* ignored */
12691                         VM_PROT_READ,
12692                         (memory_object_fault_info_t)&fault_info);
12693
12694                 vm_object_lock(object);
12695                 vm_object_paging_end(object);
12696                 vm_object_unlock(object);
12697
12698                 /*
12699                  * If we couldn't do the I/O for some reason, just give up on
12700                  * the madvise.  We still return success to the user since
12701                  * madvise isn't supposed to fail when the advice can't be
12702                  * taken.
12703                  */
12704                 if (kr != KERN_SUCCESS) {
12705                         return KERN_SUCCESS;
12706                 }
12707
12708                 start += len;
12709                 if (start >= end) {
12710                         /* done */
12711                         return KERN_SUCCESS;
12712                 }
12713
12714                 /* look up next entry */
12715                 vm_map_lock_read(map);
12716                 if (! vm_map_lookup_entry(map, start, &entry)) {
12717                         /*
12718                          * There's a new hole in the address range.
12719                          */
12720                         vm_map_unlock_read(map);
12721                         return KERN_INVALID_ADDRESS;
12722                 }
12723         }
12724
12725         vm_map_unlock_read(map);
12726         return KERN_SUCCESS;
12727 }
12728
12729 static boolean_t
12730 vm_map_entry_is_reusable(
12731         vm_map_entry_t entry)
12732 {
12733         /* Only user map entries */
12734
12735         vm_object_t object;
12736
12737         if (entry->is_sub_map) {
12738                 return FALSE;
12739         }
12740
12741         switch (VME_ALIAS(entry)) {
12742         case VM_MEMORY_MALLOC:
12743         case VM_MEMORY_MALLOC_SMALL:
12744         case VM_MEMORY_MALLOC_LARGE:
12745         case VM_MEMORY_REALLOC:
12746         case VM_MEMORY_MALLOC_TINY:
12747         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12748         case VM_MEMORY_MALLOC_LARGE_REUSED:
12749                 /*
12750                  * This is a malloc() memory region: check if it's still
12751                  * in its original state and can be re-used for more
12752                  * malloc() allocations.
12753                  */
12754                 break;
12755         default:
12756                 /*
12757                  * Not a malloc() memory region: let the caller decide if
12758                  * it's re-usable.
12759                  */
12760                 return TRUE;
12761         }
12762
12763         if (entry->is_shared ||
12764             entry->is_sub_map ||
12765             entry->in_transition ||
12766             entry->protection != VM_PROT_DEFAULT ||
12767             entry->max_protection != VM_PROT_ALL ||
12768             entry->inheritance != VM_INHERIT_DEFAULT ||
12769             entry->no_cache ||
12770             entry->permanent ||
12771             entry->superpage_size != FALSE ||
12772             entry->zero_wired_pages ||
12773             entry->wired_count != 0 ||
12774             entry->user_wired_count != 0) {
12775                 return FALSE;
12776         }
12777
12778         object = VME_OBJECT(entry);
12779         if (object == VM_OBJECT_NULL) {
12780                 return TRUE;
12781         }
12782         if (
12783 #if 0
12784                 /*
12785                  * Let's proceed even if the VM object is potentially
12786                  * shared.
12787                  * We check for this later when processing the actual
12788                  * VM pages, so the contents will be safe if shared.
12789                  *
12790                  * But we can still mark this memory region as "reusable" to
12791                  * acknowledge that the caller did let us know that the memory
12792                  * could be re-used and should not be penalized for holding
12793                  * on to it.  This allows its "resident size" to not include
12794                  * the reusable range.
12795                  */
12796             object->ref_count == 1 &&
12797 #endif
12798             object->wired_page_count == 0 &&
12799             object->copy == VM_OBJECT_NULL &&
12800             object->shadow == VM_OBJECT_NULL &&
12801             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12802             object->internal &&
12803             !object->true_share &&
12804             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12805             !object->code_signed) {
12806                 return TRUE;
12807         }
12808         return FALSE;
12809
12810
12811 }
12812
12813 static kern_return_t
12814 vm_map_reuse_pages(
12815         vm_map_t        map,
12816         vm_map_offset_t start,
12817         vm_map_offset_t end)
12818 {
12819         vm_map_entry_t                  entry;
12820         vm_object_t                     object;
12821         vm_object_offset_t              start_offset, end_offset;
12822
12823         /*
12824          * The MADV_REUSE operation doesn't require any changes to the
12825          * vm_map_entry_t's, so the read lock is sufficient.
12826          */
12827
12828         vm_map_lock_read(map);
12829         assert(map->pmap != kernel_pmap);       /* protect alias access */
12830
12831         /*
12832          * The madvise semantics require that the address range be fully
12833          * allocated with no holes.  Otherwise, we're required to return
12834          * an error.
12835          */
12836
12837         if (!vm_map_range_check(map, start, end, &entry)) {
12838                 vm_map_unlock_read(map);
12839                 vm_page_stats_reusable.reuse_pages_failure++;
12840                 return KERN_INVALID_ADDRESS;
12841         }
12842
12843         /*
12844          * Examine each vm_map_entry_t in the range.
12845          */
12846         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12847              entry = entry->vme_next) {
12848                 /*
12849                  * Sanity check on the VM map entry.
12850                  */
12851                 if (! vm_map_entry_is_reusable(entry)) {
12852                         vm_map_unlock_read(map);
12853                         vm_page_stats_reusable.reuse_pages_failure++;
12854                         return KERN_INVALID_ADDRESS;
12855                 }
12856
12857                 /*
12858                  * The first time through, the start address could be anywhere
12859                  * within the vm_map_entry we found.  So adjust the offset to
12860                  * correspond.
12861                  */
12862                 if (entry->vme_start < start) {
12863                         start_offset = start - entry->vme_start;
12864                 } else {
12865                         start_offset = 0;
12866                 }
12867                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12868                 start_offset += VME_OFFSET(entry);
12869                 end_offset += VME_OFFSET(entry);
12870
12871                 assert(!entry->is_sub_map);
12872                 object = VME_OBJECT(entry);
12873                 if (object != VM_OBJECT_NULL) {
12874                         vm_object_lock(object);
12875                         vm_object_reuse_pages(object, start_offset, end_offset,
12876                                               TRUE);
12877                         vm_object_unlock(object);
12878                 }
12879
12880                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12881                         /*
12882                          * XXX
12883                          * We do not hold the VM map exclusively here.
12884                          * The "alias" field is not that critical, so it's
12885                          * safe to update it here, as long as it is the only
12886                          * one that can be modified while holding the VM map
12887                          * "shared".
12888                          */
12889                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
12890                 }
12891         }
12892
12893         vm_map_unlock_read(map);
12894         vm_page_stats_reusable.reuse_pages_success++;
12895         return KERN_SUCCESS;
12896 }
12897
12898
12899 static kern_return_t
12900 vm_map_reusable_pages(
12901         vm_map_t        map,
12902         vm_map_offset_t start,
12903         vm_map_offset_t end)
12904 {
12905         vm_map_entry_t                  entry;
12906         vm_object_t                     object;
12907         vm_object_offset_t              start_offset, end_offset;
12908         vm_map_offset_t                 pmap_offset;
12909
12910         /*
12911          * The MADV_REUSABLE operation doesn't require any changes to the
12912          * vm_map_entry_t's, so the read lock is sufficient.
12913          */
12914
12915         vm_map_lock_read(map);
12916         assert(map->pmap != kernel_pmap);       /* protect alias access */
12917
12918         /*
12919          * The madvise semantics require that the address range be fully
12920          * allocated with no holes.  Otherwise, we're required to return
12921          * an error.
12922          */
12923
12924         if (!vm_map_range_check(map, start, end, &entry)) {
12925                 vm_map_unlock_read(map);
12926                 vm_page_stats_reusable.reusable_pages_failure++;
12927                 return KERN_INVALID_ADDRESS;
12928         }
12929
12930         /*
12931          * Examine each vm_map_entry_t in the range.
12932          */
12933         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12934              entry = entry->vme_next) {
12935                 int kill_pages = 0;
12936
12937                 /*
12938                  * Sanity check on the VM map entry.
12939                  */
12940                 if (! vm_map_entry_is_reusable(entry)) {
12941                         vm_map_unlock_read(map);
12942                         vm_page_stats_reusable.reusable_pages_failure++;
12943                         return KERN_INVALID_ADDRESS;
12944                 }
12945
12946                 /*
12947                  * The first time through, the start address could be anywhere
12948                  * within the vm_map_entry we found.  So adjust the offset to
12949                  * correspond.
12950                  */
12951                 if (entry->vme_start < start) {
12952                         start_offset = start - entry->vme_start;
12953                         pmap_offset = start;
12954                 } else {
12955                         start_offset = 0;
12956                         pmap_offset = entry->vme_start;
12957                 }
12958                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12959                 start_offset += VME_OFFSET(entry);
12960                 end_offset += VME_OFFSET(entry);
12961
12962                 assert(!entry->is_sub_map);
12963                 object = VME_OBJECT(entry);
12964                 if (object == VM_OBJECT_NULL)
12965                         continue;
12966
12967
12968                 vm_object_lock(object);
12969                 if (object->ref_count == 1 &&
12970                     !object->shadow &&
12971                     /*
12972                      * "iokit_acct" entries are billed for their virtual size
12973                      * (rather than for their resident pages only), so they
12974                      * wouldn't benefit from making pages reusable, and it
12975                      * would be hard to keep track of pages that are both
12976                      * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12977                      */
12978                     !(entry->iokit_acct ||
12979                       (!entry->is_sub_map && !entry->use_pmap)))
12980                         kill_pages = 1;
12981                 else
12982                         kill_pages = -1;
12983                 if (kill_pages != -1) {
12984                         vm_object_deactivate_pages(object,
12985                                                    start_offset,
12986                                                    end_offset - start_offset,
12987                                                    kill_pages,
12988                                                    TRUE /*reusable_pages*/,
12989                                                    map->pmap,
12990                                                    pmap_offset);
12991                 } else {
12992                         vm_page_stats_reusable.reusable_pages_shared++;
12993                 }
12994                 vm_object_unlock(object);
12995
12996                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
12997                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
12998                         /*
12999                          * XXX
13000                          * We do not hold the VM map exclusively here.
13001                          * The "alias" field is not that critical, so it's
13002                          * safe to update it here, as long as it is the only
13003                          * one that can be modified while holding the VM map
13004                          * "shared".
13005                          */
13006                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
13007                 }
13008         }
13009
13010         vm_map_unlock_read(map);
13011         vm_page_stats_reusable.reusable_pages_success++;
13012         return KERN_SUCCESS;
13013 }
13014
13015
13016 static kern_return_t
13017 vm_map_can_reuse(
13018         vm_map_t        map,
13019         vm_map_offset_t start,
13020         vm_map_offset_t end)
13021 {
13022         vm_map_entry_t                  entry;
13023
13024         /*
13025          * The MADV_REUSABLE operation doesn't require any changes to the
13026          * vm_map_entry_t's, so the read lock is sufficient.
13027          */
13028
13029         vm_map_lock_read(map);
13030         assert(map->pmap != kernel_pmap);       /* protect alias access */
13031
13032         /*
13033          * The madvise semantics require that the address range be fully
13034          * allocated with no holes.  Otherwise, we're required to return
13035          * an error.
13036          */
13037
13038         if (!vm_map_range_check(map, start, end, &entry)) {
13039                 vm_map_unlock_read(map);
13040                 vm_page_stats_reusable.can_reuse_failure++;
13041                 return KERN_INVALID_ADDRESS;
13042         }
13043
13044         /*
13045          * Examine each vm_map_entry_t in the range.
13046          */
13047         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13048              entry = entry->vme_next) {
13049                 /*
13050                  * Sanity check on the VM map entry.
13051                  */
13052                 if (! vm_map_entry_is_reusable(entry)) {
13053                         vm_map_unlock_read(map);
13054                         vm_page_stats_reusable.can_reuse_failure++;
13055                         return KERN_INVALID_ADDRESS;
13056                 }
13057         }
13058
13059         vm_map_unlock_read(map);
13060         vm_page_stats_reusable.can_reuse_success++;
13061         return KERN_SUCCESS;
13062 }
13063
13064
13065 #if MACH_ASSERT
13066 static kern_return_t
13067 vm_map_pageout(
13068         vm_map_t        map,
13069         vm_map_offset_t start,
13070         vm_map_offset_t end)
13071 {
13072         vm_map_entry_t                  entry;
13073
13074         /*
13075          * The MADV_PAGEOUT operation doesn't require any changes to the
13076          * vm_map_entry_t's, so the read lock is sufficient.
13077          */
13078
13079         vm_map_lock_read(map);
13080
13081         /*
13082          * The madvise semantics require that the address range be fully
13083          * allocated with no holes.  Otherwise, we're required to return
13084          * an error.
13085          */
13086
13087         if (!vm_map_range_check(map, start, end, &entry)) {
13088                 vm_map_unlock_read(map);
13089                 return KERN_INVALID_ADDRESS;
13090         }
13091
13092         /*
13093          * Examine each vm_map_entry_t in the range.
13094          */
13095         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
13096              entry = entry->vme_next) {
13097                 vm_object_t     object;
13098
13099                 /*
13100                  * Sanity check on the VM map entry.
13101                  */
13102                 if (entry->is_sub_map) {
13103                         vm_map_t submap;
13104                         vm_map_offset_t submap_start;
13105                         vm_map_offset_t submap_end;
13106                         vm_map_entry_t submap_entry;
13107
13108                         submap = VME_SUBMAP(entry);
13109                         submap_start = VME_OFFSET(entry);
13110                         submap_end = submap_start + (entry->vme_end -
13111                                                      entry->vme_start);
13112
13113                         vm_map_lock_read(submap);
13114
13115                         if (! vm_map_range_check(submap,
13116                                                  submap_start,
13117                                                  submap_end,
13118                                                  &submap_entry)) {
13119                                 vm_map_unlock_read(submap);
13120                                 vm_map_unlock_read(map);
13121                                 return KERN_INVALID_ADDRESS;
13122                         }
13123
13124                         object = VME_OBJECT(submap_entry);
13125                         if (submap_entry->is_sub_map ||
13126                             object == VM_OBJECT_NULL ||
13127                             !object->internal) {
13128                                 vm_map_unlock_read(submap);
13129                                 continue;
13130                         }
13131
13132                         vm_object_pageout(object);
13133
13134                         vm_map_unlock_read(submap);
13135                         submap = VM_MAP_NULL;
13136                         submap_entry = VM_MAP_ENTRY_NULL;
13137                         continue;
13138                 }
13139
13140                 object = VME_OBJECT(entry);
13141                 if (entry->is_sub_map ||
13142                     object == VM_OBJECT_NULL ||
13143                     !object->internal) {
13144                         continue;
13145                 }
13146
13147                 vm_object_pageout(object);
13148         }
13149
13150         vm_map_unlock_read(map);
13151         return KERN_SUCCESS;
13152 }
13153 #endif /* MACH_ASSERT */
13154
13155
13156 /*
13157  *      Routine:        vm_map_entry_insert
13158  *
13159  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
13160  */
13161 vm_map_entry_t
13162 vm_map_entry_insert(
13163         vm_map_t                map,
13164         vm_map_entry_t          insp_entry,
13165         vm_map_offset_t         start,
13166         vm_map_offset_t         end,
13167         vm_object_t             object,
13168         vm_object_offset_t      offset,
13169         boolean_t               needs_copy,
13170         boolean_t               is_shared,
13171         boolean_t               in_transition,
13172         vm_prot_t               cur_protection,
13173         vm_prot_t               max_protection,
13174         vm_behavior_t           behavior,
13175         vm_inherit_t            inheritance,
13176         unsigned                wired_count,
13177         boolean_t               no_cache,
13178         boolean_t               permanent,
13179         unsigned int            superpage_size,
13180         boolean_t               clear_map_aligned,
13181         boolean_t               is_submap)
13182 {
13183         vm_map_entry_t  new_entry;
13184
13185         assert(insp_entry != (vm_map_entry_t)0);
13186
13187         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
13188
13189         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
13190                 new_entry->map_aligned = TRUE;
13191         } else {
13192                 new_entry->map_aligned = FALSE;
13193         }
13194         if (clear_map_aligned &&
13195             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
13196              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
13197                 new_entry->map_aligned = FALSE;
13198         }
13199
13200         new_entry->vme_start = start;
13201         new_entry->vme_end = end;
13202         assert(page_aligned(new_entry->vme_start));
13203         assert(page_aligned(new_entry->vme_end));
13204         if (new_entry->map_aligned) {
13205                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
13206                                            VM_MAP_PAGE_MASK(map)));
13207                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
13208                                            VM_MAP_PAGE_MASK(map)));
13209         }
13210         assert(new_entry->vme_start < new_entry->vme_end);
13211
13212         VME_OBJECT_SET(new_entry, object);
13213         VME_OFFSET_SET(new_entry, offset);
13214         new_entry->is_shared = is_shared;
13215         new_entry->is_sub_map = is_submap;
13216         new_entry->needs_copy = needs_copy;
13217         new_entry->in_transition = in_transition;
13218         new_entry->needs_wakeup = FALSE;
13219         new_entry->inheritance = inheritance;
13220         new_entry->protection = cur_protection;
13221         new_entry->max_protection = max_protection;
13222         new_entry->behavior = behavior;
13223         new_entry->wired_count = wired_count;
13224         new_entry->user_wired_count = 0;
13225         if (is_submap) {
13226                 /*
13227                  * submap: "use_pmap" means "nested".
13228                  * default: false.
13229                  */
13230                 new_entry->use_pmap = FALSE;
13231         } else {
13232                 /*
13233                  * object: "use_pmap" means "use pmap accounting" for footprint.
13234                  * default: true.
13235                  */
13236                 new_entry->use_pmap = TRUE;
13237         }
13238         VME_ALIAS_SET(new_entry, 0);
13239         new_entry->zero_wired_pages = FALSE;
13240         new_entry->no_cache = no_cache;
13241         new_entry->permanent = permanent;
13242         if (superpage_size)
13243                 new_entry->superpage_size = TRUE;
13244         else
13245                 new_entry->superpage_size = FALSE;
13246         new_entry->used_for_jit = FALSE;
13247         new_entry->iokit_acct = FALSE;
13248         new_entry->vme_resilient_codesign = FALSE;
13249         new_entry->vme_resilient_media = FALSE;
13250
13251         /*
13252          *      Insert the new entry into the list.
13253          */
13254
13255         vm_map_store_entry_link(map, insp_entry, new_entry);
13256         map->size += end - start;
13257
13258         /*
13259          *      Update the free space hint and the lookup hint.
13260          */
13261
13262         SAVE_HINT_MAP_WRITE(map, new_entry);
13263         return new_entry;
13264 }
13265
13266 /*
13267  *      Routine:        vm_map_remap_extract
13268  *
13269  *      Descritpion:    This routine returns a vm_entry list from a map.
13270  */
13271 static kern_return_t
13272 vm_map_remap_extract(
13273         vm_map_t                map,
13274         vm_map_offset_t         addr,
13275         vm_map_size_t           size,
13276         boolean_t               copy,
13277         struct vm_map_header    *map_header,
13278         vm_prot_t               *cur_protection,
13279         vm_prot_t               *max_protection,
13280         /* What, no behavior? */
13281         vm_inherit_t            inheritance,
13282         boolean_t               pageable)
13283 {
13284         kern_return_t           result;
13285         vm_map_size_t           mapped_size;
13286         vm_map_size_t           tmp_size;
13287         vm_map_entry_t          src_entry;     /* result of last map lookup */
13288         vm_map_entry_t          new_entry;
13289         vm_object_offset_t      offset;
13290         vm_map_offset_t         map_address;
13291         vm_map_offset_t         src_start;     /* start of entry to map */
13292         vm_map_offset_t         src_end;       /* end of region to be mapped */
13293         vm_object_t             object;
13294         vm_map_version_t        version;
13295         boolean_t               src_needs_copy;
13296         boolean_t               new_entry_needs_copy;
13297
13298         assert(map != VM_MAP_NULL);
13299         assert(size != 0);
13300         assert(size == vm_map_round_page(size, PAGE_MASK));
13301         assert(inheritance == VM_INHERIT_NONE ||
13302                inheritance == VM_INHERIT_COPY ||
13303                inheritance == VM_INHERIT_SHARE);
13304
13305         /*
13306          *      Compute start and end of region.
13307          */
13308         src_start = vm_map_trunc_page(addr, PAGE_MASK);
13309         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
13310
13311
13312         /*
13313          *      Initialize map_header.
13314          */
13315         map_header->links.next = (struct vm_map_entry *)&map_header->links;
13316         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
13317         map_header->nentries = 0;
13318         map_header->entries_pageable = pageable;
13319         map_header->page_shift = PAGE_SHIFT;
13320
13321         vm_map_store_init( map_header );
13322
13323         *cur_protection = VM_PROT_ALL;
13324         *max_protection = VM_PROT_ALL;
13325
13326         map_address = 0;
13327         mapped_size = 0;
13328         result = KERN_SUCCESS;
13329
13330         /*
13331          *      The specified source virtual space might correspond to
13332          *      multiple map entries, need to loop on them.
13333          */
13334         vm_map_lock(map);
13335         while (mapped_size != size) {
13336                 vm_map_size_t   entry_size;
13337
13338                 /*
13339                  *      Find the beginning of the region.
13340                  */
13341                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
13342                         result = KERN_INVALID_ADDRESS;
13343                         break;
13344                 }
13345
13346                 if (src_start < src_entry->vme_start ||
13347                     (mapped_size && src_start != src_entry->vme_start)) {
13348                         result = KERN_INVALID_ADDRESS;
13349                         break;
13350                 }
13351
13352                 tmp_size = size - mapped_size;
13353                 if (src_end > src_entry->vme_end)
13354                         tmp_size -= (src_end - src_entry->vme_end);
13355
13356                 entry_size = (vm_map_size_t)(src_entry->vme_end -
13357                                              src_entry->vme_start);
13358
13359                 if(src_entry->is_sub_map) {
13360                         vm_map_reference(VME_SUBMAP(src_entry));
13361                         object = VM_OBJECT_NULL;
13362                 } else {
13363                         object = VME_OBJECT(src_entry);
13364                         if (src_entry->iokit_acct) {
13365                                 /*
13366                                  * This entry uses "IOKit accounting".
13367                                  */
13368                         } else if (object != VM_OBJECT_NULL &&
13369                                    object->purgable != VM_PURGABLE_DENY) {
13370                                 /*
13371                                  * Purgeable objects have their own accounting:
13372                                  * no pmap accounting for them.
13373                                  */
13374                                 assert(!src_entry->use_pmap);
13375                         } else {
13376                                 /*
13377                                  * Not IOKit or purgeable:
13378                                  * must be accounted by pmap stats.
13379                                  */
13380                                 assert(src_entry->use_pmap);
13381                         }
13382
13383                         if (object == VM_OBJECT_NULL) {
13384                                 object = vm_object_allocate(entry_size);
13385                                 VME_OFFSET_SET(src_entry, 0);
13386                                 VME_OBJECT_SET(src_entry, object);
13387                         } else if (object->copy_strategy !=
13388                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
13389                                 /*
13390                                  *      We are already using an asymmetric
13391                                  *      copy, and therefore we already have
13392                                  *      the right object.
13393                                  */
13394                                 assert(!src_entry->needs_copy);
13395                         } else if (src_entry->needs_copy || object->shadowed ||
13396                                    (object->internal && !object->true_share &&
13397                                     !src_entry->is_shared &&
13398                                     object->vo_size > entry_size)) {
13399
13400                                 VME_OBJECT_SHADOW(src_entry, entry_size);
13401
13402                                 if (!src_entry->needs_copy &&
13403                                     (src_entry->protection & VM_PROT_WRITE)) {
13404                                         vm_prot_t prot;
13405
13406                                         prot = src_entry->protection & ~VM_PROT_WRITE;
13407
13408                                         if (override_nx(map,
13409                                                         VME_ALIAS(src_entry))
13410                                             && prot)
13411                                                 prot |= VM_PROT_EXECUTE;
13412
13413                                         if(map->mapped_in_other_pmaps) {
13414                                                 vm_object_pmap_protect(
13415                                                         VME_OBJECT(src_entry),
13416                                                         VME_OFFSET(src_entry),
13417                                                         entry_size,
13418                                                         PMAP_NULL,
13419                                                         src_entry->vme_start,
13420                                                         prot);
13421                                         } else {
13422                                                 pmap_protect(vm_map_pmap(map),
13423                                                              src_entry->vme_start,
13424                                                              src_entry->vme_end,
13425                                                              prot);
13426                                         }
13427                                 }
13428
13429                                 object = VME_OBJECT(src_entry);
13430                                 src_entry->needs_copy = FALSE;
13431                         }
13432
13433
13434                         vm_object_lock(object);
13435                         vm_object_reference_locked(object); /* object ref. for new entry */
13436                         if (object->copy_strategy ==
13437                             MEMORY_OBJECT_COPY_SYMMETRIC) {
13438                                 object->copy_strategy =
13439                                         MEMORY_OBJECT_COPY_DELAY;
13440                         }
13441                         vm_object_unlock(object);
13442                 }
13443
13444                 offset = (VME_OFFSET(src_entry) +
13445                           (src_start - src_entry->vme_start));
13446
13447                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
13448                 vm_map_entry_copy(new_entry, src_entry);
13449                 if (new_entry->is_sub_map) {
13450                         /* clr address space specifics */
13451                         new_entry->use_pmap = FALSE;
13452                 }
13453
13454                 new_entry->map_aligned = FALSE;
13455
13456                 new_entry->vme_start = map_address;
13457                 new_entry->vme_end = map_address + tmp_size;
13458                 assert(new_entry->vme_start < new_entry->vme_end);
13459                 new_entry->inheritance = inheritance;
13460                 VME_OFFSET_SET(new_entry, offset);
13461
13462                 /*
13463                  * The new region has to be copied now if required.
13464                  */
13465         RestartCopy:
13466                 if (!copy) {
13467                         /*
13468                          * Cannot allow an entry describing a JIT
13469                          * region to be shared across address spaces.
13470                          */
13471                         if (src_entry->used_for_jit == TRUE) {
13472                                 result = KERN_INVALID_ARGUMENT;
13473                                 break;
13474                         }
13475                         src_entry->is_shared = TRUE;
13476                         new_entry->is_shared = TRUE;
13477                         if (!(new_entry->is_sub_map))
13478                                 new_entry->needs_copy = FALSE;
13479
13480                 } else if (src_entry->is_sub_map) {
13481                         /* make this a COW sub_map if not already */
13482                         assert(new_entry->wired_count == 0);
13483                         new_entry->needs_copy = TRUE;
13484                         object = VM_OBJECT_NULL;
13485                 } else if (src_entry->wired_count == 0 &&
13486                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
13487                                                   VME_OFFSET(new_entry),
13488                                                   (new_entry->vme_end -
13489                                                    new_entry->vme_start),
13490                                                   &src_needs_copy,
13491                                                   &new_entry_needs_copy)) {
13492
13493                         new_entry->needs_copy = new_entry_needs_copy;
13494                         new_entry->is_shared = FALSE;
13495
13496                         /*
13497                          * Handle copy_on_write semantics.
13498                          */
13499                         if (src_needs_copy && !src_entry->needs_copy) {
13500                                 vm_prot_t prot;
13501
13502                                 prot = src_entry->protection & ~VM_PROT_WRITE;
13503
13504                                 if (override_nx(map,
13505                                                 VME_ALIAS(src_entry))
13506                                     && prot)
13507                                         prot |= VM_PROT_EXECUTE;
13508
13509                                 vm_object_pmap_protect(object,
13510                                                        offset,
13511                                                        entry_size,
13512                                                        ((src_entry->is_shared
13513                                                          || map->mapped_in_other_pmaps) ?
13514                                                         PMAP_NULL : map->pmap),
13515                                                        src_entry->vme_start,
13516                                                        prot);
13517
13518                                 assert(src_entry->wired_count == 0);
13519                                 src_entry->needs_copy = TRUE;
13520                         }
13521                         /*
13522                          * Throw away the old object reference of the new entry.
13523                          */
13524                         vm_object_deallocate(object);
13525
13526                 } else {
13527                         new_entry->is_shared = FALSE;
13528
13529                         /*
13530                          * The map can be safely unlocked since we
13531                          * already hold a reference on the object.
13532                          *
13533                          * Record the timestamp of the map for later
13534                          * verification, and unlock the map.
13535                          */
13536                         version.main_timestamp = map->timestamp;
13537                         vm_map_unlock(map);     /* Increments timestamp once! */
13538
13539                         /*
13540                          * Perform the copy.
13541                          */
13542                         if (src_entry->wired_count > 0) {
13543                                 vm_object_lock(object);
13544                                 result = vm_object_copy_slowly(
13545                                         object,
13546                                         offset,
13547                                         entry_size,
13548                                         THREAD_UNINT,
13549                                         &VME_OBJECT(new_entry));
13550
13551                                 VME_OFFSET_SET(new_entry, 0);
13552                                 new_entry->needs_copy = FALSE;
13553                         } else {
13554                                 vm_object_offset_t new_offset;
13555
13556                                 new_offset = VME_OFFSET(new_entry);
13557                                 result = vm_object_copy_strategically(
13558                                         object,
13559                                         offset,
13560                                         entry_size,
13561                                         &VME_OBJECT(new_entry),
13562                                         &new_offset,
13563                                         &new_entry_needs_copy);
13564                                 if (new_offset != VME_OFFSET(new_entry)) {
13565                                         VME_OFFSET_SET(new_entry, new_offset);
13566                                 }
13567
13568                                 new_entry->needs_copy = new_entry_needs_copy;
13569                         }
13570
13571                         /*
13572                          * Throw away the old object reference of the new entry.
13573                          */
13574                         vm_object_deallocate(object);
13575
13576                         if (result != KERN_SUCCESS &&
13577                             result != KERN_MEMORY_RESTART_COPY) {
13578                                 _vm_map_entry_dispose(map_header, new_entry);
13579                                 break;
13580                         }
13581
13582                         /*
13583                          * Verify that the map has not substantially
13584                          * changed while the copy was being made.
13585                          */
13586
13587                         vm_map_lock(map);
13588                         if (version.main_timestamp + 1 != map->timestamp) {
13589                                 /*
13590                                  * Simple version comparison failed.
13591                                  *
13592                                  * Retry the lookup and verify that the
13593                                  * same object/offset are still present.
13594                                  */
13595                                 vm_object_deallocate(VME_OBJECT(new_entry));
13596                                 _vm_map_entry_dispose(map_header, new_entry);
13597                                 if (result == KERN_MEMORY_RESTART_COPY)
13598                                         result = KERN_SUCCESS;
13599                                 continue;
13600                         }
13601
13602                         if (result == KERN_MEMORY_RESTART_COPY) {
13603                                 vm_object_reference(object);
13604                                 goto RestartCopy;
13605                         }
13606                 }
13607
13608                 _vm_map_store_entry_link(map_header,
13609                                    map_header->links.prev, new_entry);
13610
13611                 /*Protections for submap mapping are irrelevant here*/
13612                 if( !src_entry->is_sub_map ) {
13613                         *cur_protection &= src_entry->protection;
13614                         *max_protection &= src_entry->max_protection;
13615                 }
13616                 map_address += tmp_size;
13617                 mapped_size += tmp_size;
13618                 src_start += tmp_size;
13619
13620         } /* end while */
13621
13622         vm_map_unlock(map);
13623         if (result != KERN_SUCCESS) {
13624                 /*
13625                  * Free all allocated elements.
13626                  */
13627                 for (src_entry = map_header->links.next;
13628                      src_entry != (struct vm_map_entry *)&map_header->links;
13629                      src_entry = new_entry) {
13630                         new_entry = src_entry->vme_next;
13631                         _vm_map_store_entry_unlink(map_header, src_entry);
13632                         if (src_entry->is_sub_map) {
13633                                 vm_map_deallocate(VME_SUBMAP(src_entry));
13634                         } else {
13635                                 vm_object_deallocate(VME_OBJECT(src_entry));
13636                         }
13637                         _vm_map_entry_dispose(map_header, src_entry);
13638                 }
13639         }
13640         return result;
13641 }
13642
13643 /*
13644  *      Routine:        vm_remap
13645  *
13646  *                      Map portion of a task's address space.
13647  *                      Mapped region must not overlap more than
13648  *                      one vm memory object. Protections and
13649  *                      inheritance attributes remain the same
13650  *                      as in the original task and are out parameters.
13651  *                      Source and Target task can be identical
13652  *                      Other attributes are identical as for vm_map()
13653  */
13654 kern_return_t
13655 vm_map_remap(
13656         vm_map_t                target_map,
13657         vm_map_address_t        *address,
13658         vm_map_size_t           size,
13659         vm_map_offset_t         mask,
13660         int                     flags,
13661         vm_map_t                src_map,
13662         vm_map_offset_t         memory_address,
13663         boolean_t               copy,
13664         vm_prot_t               *cur_protection,
13665         vm_prot_t               *max_protection,
13666         vm_inherit_t            inheritance)
13667 {
13668         kern_return_t           result;
13669         vm_map_entry_t          entry;
13670         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
13671         vm_map_entry_t          new_entry;
13672         struct vm_map_header    map_header;
13673         vm_map_offset_t         offset_in_mapping;
13674
13675         if (target_map == VM_MAP_NULL)
13676                 return KERN_INVALID_ARGUMENT;
13677
13678         switch (inheritance) {
13679         case VM_INHERIT_NONE:
13680         case VM_INHERIT_COPY:
13681         case VM_INHERIT_SHARE:
13682                 if (size != 0 && src_map != VM_MAP_NULL)
13683                         break;
13684                 /*FALL THRU*/
13685         default:
13686                 return KERN_INVALID_ARGUMENT;
13687         }
13688
13689         /*
13690          * If the user is requesting that we return the address of the
13691          * first byte of the data (rather than the base of the page),
13692          * then we use different rounding semantics: specifically,
13693          * we assume that (memory_address, size) describes a region
13694          * all of whose pages we must cover, rather than a base to be truncated
13695          * down and a size to be added to that base.  So we figure out
13696          * the highest page that the requested region includes and make
13697          * sure that the size will cover it.
13698          *
13699          * The key example we're worried about it is of the form:
13700          *
13701          *              memory_address = 0x1ff0, size = 0x20
13702          *
13703          * With the old semantics, we round down the memory_address to 0x1000
13704          * and round up the size to 0x1000, resulting in our covering *only*
13705          * page 0x1000.  With the new semantics, we'd realize that the region covers
13706          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
13707          * 0x1000 and page 0x2000 in the region we remap.
13708          */
13709         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13710                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
13711                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
13712         } else {
13713                 size = vm_map_round_page(size, PAGE_MASK);
13714         }
13715
13716         result = vm_map_remap_extract(src_map, memory_address,
13717                                       size, copy, &map_header,
13718                                       cur_protection,
13719                                       max_protection,
13720                                       inheritance,
13721                                       target_map->hdr.entries_pageable);
13722
13723         if (result != KERN_SUCCESS) {
13724                 return result;
13725         }
13726
13727         /*
13728          * Allocate/check a range of free virtual address
13729          * space for the target
13730          */
13731         *address = vm_map_trunc_page(*address,
13732                                      VM_MAP_PAGE_MASK(target_map));
13733         vm_map_lock(target_map);
13734         result = vm_map_remap_range_allocate(target_map, address, size,
13735                                              mask, flags, &insp_entry);
13736
13737         for (entry = map_header.links.next;
13738              entry != (struct vm_map_entry *)&map_header.links;
13739              entry = new_entry) {
13740                 new_entry = entry->vme_next;
13741                 _vm_map_store_entry_unlink(&map_header, entry);
13742                 if (result == KERN_SUCCESS) {
13743                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13744                                 /* no codesigning -> read-only access */
13745                                 assert(!entry->used_for_jit);
13746                                 entry->max_protection = VM_PROT_READ;
13747                                 entry->protection = VM_PROT_READ;
13748                                 entry->vme_resilient_codesign = TRUE;
13749                         }
13750                         entry->vme_start += *address;
13751                         entry->vme_end += *address;
13752                         assert(!entry->map_aligned);
13753                         vm_map_store_entry_link(target_map, insp_entry, entry);
13754                         insp_entry = entry;
13755                 } else {
13756                         if (!entry->is_sub_map) {
13757                                 vm_object_deallocate(VME_OBJECT(entry));
13758                         } else {
13759                                 vm_map_deallocate(VME_SUBMAP(entry));
13760                         }
13761                         _vm_map_entry_dispose(&map_header, entry);
13762                 }
13763         }
13764
13765         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
13766                 *cur_protection = VM_PROT_READ;
13767                 *max_protection = VM_PROT_READ;
13768         }
13769
13770         if( target_map->disable_vmentry_reuse == TRUE) {
13771                 if( target_map->highest_entry_end < insp_entry->vme_end ){
13772                         target_map->highest_entry_end = insp_entry->vme_end;
13773                 }
13774         }
13775
13776         if (result == KERN_SUCCESS) {
13777                 target_map->size += size;
13778                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
13779         }
13780         vm_map_unlock(target_map);
13781
13782         if (result == KERN_SUCCESS && target_map->wiring_required)
13783                 result = vm_map_wire(target_map, *address,
13784                                      *address + size, *cur_protection | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_MLOCK),
13785                                      TRUE);
13786
13787         /*
13788          * If requested, return the address of the data pointed to by the
13789          * request, rather than the base of the resulting page.
13790          */
13791         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13792                 *address += offset_in_mapping;
13793         }
13794
13795         return result;
13796 }
13797
13798 /*
13799  *      Routine:        vm_map_remap_range_allocate
13800  *
13801  *      Description:
13802  *              Allocate a range in the specified virtual address map.
13803  *              returns the address and the map entry just before the allocated
13804  *              range
13805  *
13806  *      Map must be locked.
13807  */
13808
13809 static kern_return_t
13810 vm_map_remap_range_allocate(
13811         vm_map_t                map,
13812         vm_map_address_t        *address,       /* IN/OUT */
13813         vm_map_size_t           size,
13814         vm_map_offset_t         mask,
13815         int                     flags,
13816         vm_map_entry_t          *map_entry)     /* OUT */
13817 {
13818         vm_map_entry_t  entry;
13819         vm_map_offset_t start;
13820         vm_map_offset_t end;
13821         kern_return_t   kr;
13822         vm_map_entry_t          hole_entry;
13823
13824 StartAgain: ;
13825
13826         start = *address;
13827
13828         if (flags & VM_FLAGS_ANYWHERE)
13829         {
13830                 /*
13831                  *      Calculate the first possible address.
13832                  */
13833
13834                 if (start < map->min_offset)
13835                         start = map->min_offset;
13836                 if (start > map->max_offset)
13837                         return(KERN_NO_SPACE);
13838
13839                 /*
13840                  *      Look for the first possible address;
13841                  *      if there's already something at this
13842                  *      address, we have to start after it.
13843                  */
13844
13845                 if( map->disable_vmentry_reuse == TRUE) {
13846                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
13847                 } else {
13848
13849                         if (map->holelistenabled) {
13850                                 hole_entry = (vm_map_entry_t)map->holes_list;
13851
13852                                 if (hole_entry == NULL) {
13853                                         /*
13854                                          * No more space in the map?
13855                                          */
13856                                         return(KERN_NO_SPACE);
13857                                 } else {
13858
13859                                         boolean_t found_hole = FALSE;
13860
13861                                         do {
13862                                                 if (hole_entry->vme_start >= start) {
13863                                                         start = hole_entry->vme_start;
13864                                                         found_hole = TRUE;
13865                                                         break;
13866                                                 }
13867
13868                                                 if (hole_entry->vme_end > start) {
13869                                                         found_hole = TRUE;
13870                                                         break;
13871                                                 }
13872                                                 hole_entry = hole_entry->vme_next;
13873
13874                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
13875
13876                                         if (found_hole == FALSE) {
13877                                                 return (KERN_NO_SPACE);
13878                                         }
13879
13880                                         entry = hole_entry;
13881                                 }
13882                         } else {
13883                                 assert(first_free_is_valid(map));
13884                                 if (start == map->min_offset) {
13885                                         if ((entry = map->first_free) != vm_map_to_entry(map))
13886                                                 start = entry->vme_end;
13887                                 } else {
13888                                         vm_map_entry_t  tmp_entry;
13889                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
13890                                                 start = tmp_entry->vme_end;
13891                                         entry = tmp_entry;
13892                                 }
13893                         }
13894                         start = vm_map_round_page(start,
13895                                                   VM_MAP_PAGE_MASK(map));
13896                 }
13897
13898                 /*
13899                  *      In any case, the "entry" always precedes
13900                  *      the proposed new region throughout the
13901                  *      loop:
13902                  */
13903
13904                 while (TRUE) {
13905                         register vm_map_entry_t next;
13906
13907                         /*
13908                          *      Find the end of the proposed new region.
13909                          *      Be sure we didn't go beyond the end, or
13910                          *      wrap around the address.
13911                          */
13912
13913                         end = ((start + mask) & ~mask);
13914                         end = vm_map_round_page(end,
13915                                                 VM_MAP_PAGE_MASK(map));
13916                         if (end < start)
13917                                 return(KERN_NO_SPACE);
13918                         start = end;
13919                         end += size;
13920
13921                         if ((end > map->max_offset) || (end < start)) {
13922                                 if (map->wait_for_space) {
13923                                         if (size <= (map->max_offset -
13924                                                      map->min_offset)) {
13925                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13926                                                 vm_map_unlock(map);
13927                                                 thread_block(THREAD_CONTINUE_NULL);
13928                                                 vm_map_lock(map);
13929                                                 goto StartAgain;
13930                                         }
13931                                 }
13932
13933                                 return(KERN_NO_SPACE);
13934                         }
13935
13936                         next = entry->vme_next;
13937
13938                         if (map->holelistenabled) {
13939                                 if (entry->vme_end >= end)
13940                                         break;
13941                         } else {
13942                                 /*
13943                                  *      If there are no more entries, we must win.
13944                                  *
13945                                  *      OR
13946                                  *
13947                                  *      If there is another entry, it must be
13948                                  *      after the end of the potential new region.
13949                                  */
13950
13951                                 if (next == vm_map_to_entry(map))
13952                                         break;
13953
13954                                 if (next->vme_start >= end)
13955                                         break;
13956                         }
13957
13958                         /*
13959                          *      Didn't fit -- move to the next entry.
13960                          */
13961
13962                         entry = next;
13963
13964                         if (map->holelistenabled) {
13965                                 if (entry == (vm_map_entry_t) map->holes_list) {
13966                                         /*
13967                                          * Wrapped around
13968                                          */
13969                                         return(KERN_NO_SPACE);
13970                                 }
13971                                 start = entry->vme_start;
13972                         } else {
13973                                 start = entry->vme_end;
13974                         }
13975                 }
13976
13977                 if (map->holelistenabled) {
13978
13979                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
13980                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
13981                         }
13982                 }
13983
13984                 *address = start;
13985
13986         } else {
13987                 vm_map_entry_t          temp_entry;
13988
13989                 /*
13990                  *      Verify that:
13991                  *              the address doesn't itself violate
13992                  *              the mask requirement.
13993                  */
13994
13995                 if ((start & mask) != 0)
13996                         return(KERN_NO_SPACE);
13997
13998
13999                 /*
14000                  *      ...     the address is within bounds
14001                  */
14002
14003                 end = start + size;
14004
14005                 if ((start < map->min_offset) ||
14006                     (end > map->max_offset) ||
14007                     (start >= end)) {
14008                         return(KERN_INVALID_ADDRESS);
14009                 }
14010
14011                 /*
14012                  * If we're asked to overwrite whatever was mapped in that
14013                  * range, first deallocate that range.
14014                  */
14015                 if (flags & VM_FLAGS_OVERWRITE) {
14016                         vm_map_t zap_map;
14017
14018                         /*
14019                          * We use a "zap_map" to avoid having to unlock
14020                          * the "map" in vm_map_delete(), which would compromise
14021                          * the atomicity of the "deallocate" and then "remap"
14022                          * combination.
14023                          */
14024                         zap_map = vm_map_create(PMAP_NULL,
14025                                                 start,
14026                                                 end,
14027                                                 map->hdr.entries_pageable);
14028                         if (zap_map == VM_MAP_NULL) {
14029                                 return KERN_RESOURCE_SHORTAGE;
14030                         }
14031                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
14032                         vm_map_disable_hole_optimization(zap_map);
14033
14034                         kr = vm_map_delete(map, start, end,
14035                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
14036                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
14037                                            zap_map);
14038                         if (kr == KERN_SUCCESS) {
14039                                 vm_map_destroy(zap_map,
14040                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14041                                 zap_map = VM_MAP_NULL;
14042                         }
14043                 }
14044
14045                 /*
14046                  *      ...     the starting address isn't allocated
14047                  */
14048
14049                 if (vm_map_lookup_entry(map, start, &temp_entry))
14050                         return(KERN_NO_SPACE);
14051
14052                 entry = temp_entry;
14053
14054                 /*
14055                  *      ...     the next region doesn't overlap the
14056                  *              end point.
14057                  */
14058
14059                 if ((entry->vme_next != vm_map_to_entry(map)) &&
14060                     (entry->vme_next->vme_start < end))
14061                         return(KERN_NO_SPACE);
14062         }
14063         *map_entry = entry;
14064         return(KERN_SUCCESS);
14065 }
14066
14067 /*
14068  *      vm_map_switch:
14069  *
14070  *      Set the address map for the current thread to the specified map
14071  */
14072
14073 vm_map_t
14074 vm_map_switch(
14075         vm_map_t        map)
14076 {
14077         int             mycpu;
14078         thread_t        thread = current_thread();
14079         vm_map_t        oldmap = thread->map;
14080
14081         mp_disable_preemption();
14082         mycpu = cpu_number();
14083
14084         /*
14085          *      Deactivate the current map and activate the requested map
14086          */
14087         PMAP_SWITCH_USER(thread, map, mycpu);
14088
14089         mp_enable_preemption();
14090         return(oldmap);
14091 }
14092
14093
14094 /*
14095  *      Routine:        vm_map_write_user
14096  *
14097  *      Description:
14098  *              Copy out data from a kernel space into space in the
14099  *              destination map. The space must already exist in the
14100  *              destination map.
14101  *              NOTE:  This routine should only be called by threads
14102  *              which can block on a page fault. i.e. kernel mode user
14103  *              threads.
14104  *
14105  */
14106 kern_return_t
14107 vm_map_write_user(
14108         vm_map_t                map,
14109         void                    *src_p,
14110         vm_map_address_t        dst_addr,
14111         vm_size_t               size)
14112 {
14113         kern_return_t   kr = KERN_SUCCESS;
14114
14115         if(current_map() == map) {
14116                 if (copyout(src_p, dst_addr, size)) {
14117                         kr = KERN_INVALID_ADDRESS;
14118                 }
14119         } else {
14120                 vm_map_t        oldmap;
14121
14122                 /* take on the identity of the target map while doing */
14123                 /* the transfer */
14124
14125                 vm_map_reference(map);
14126                 oldmap = vm_map_switch(map);
14127                 if (copyout(src_p, dst_addr, size)) {
14128                         kr = KERN_INVALID_ADDRESS;
14129                 }
14130                 vm_map_switch(oldmap);
14131                 vm_map_deallocate(map);
14132         }
14133         return kr;
14134 }
14135
14136 /*
14137  *      Routine:        vm_map_read_user
14138  *
14139  *      Description:
14140  *              Copy in data from a user space source map into the
14141  *              kernel map. The space must already exist in the
14142  *              kernel map.
14143  *              NOTE:  This routine should only be called by threads
14144  *              which can block on a page fault. i.e. kernel mode user
14145  *              threads.
14146  *
14147  */
14148 kern_return_t
14149 vm_map_read_user(
14150         vm_map_t                map,
14151         vm_map_address_t        src_addr,
14152         void                    *dst_p,
14153         vm_size_t               size)
14154 {
14155         kern_return_t   kr = KERN_SUCCESS;
14156
14157         if(current_map() == map) {
14158                 if (copyin(src_addr, dst_p, size)) {
14159                         kr = KERN_INVALID_ADDRESS;
14160                 }
14161         } else {
14162                 vm_map_t        oldmap;
14163
14164                 /* take on the identity of the target map while doing */
14165                 /* the transfer */
14166
14167                 vm_map_reference(map);
14168                 oldmap = vm_map_switch(map);
14169                 if (copyin(src_addr, dst_p, size)) {
14170                         kr = KERN_INVALID_ADDRESS;
14171                 }
14172                 vm_map_switch(oldmap);
14173                 vm_map_deallocate(map);
14174         }
14175         return kr;
14176 }
14177
14178
14179 /*
14180  *      vm_map_check_protection:
14181  *
14182  *      Assert that the target map allows the specified
14183  *      privilege on the entire address region given.
14184  *      The entire region must be allocated.
14185  */
14186 boolean_t
14187 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
14188                         vm_map_offset_t end, vm_prot_t protection)
14189 {
14190         vm_map_entry_t entry;
14191         vm_map_entry_t tmp_entry;
14192
14193         vm_map_lock(map);
14194
14195         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
14196         {
14197                 vm_map_unlock(map);
14198                 return (FALSE);
14199         }
14200
14201         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14202                 vm_map_unlock(map);
14203                 return(FALSE);
14204         }
14205
14206         entry = tmp_entry;
14207
14208         while (start < end) {
14209                 if (entry == vm_map_to_entry(map)) {
14210                         vm_map_unlock(map);
14211                         return(FALSE);
14212                 }
14213
14214                 /*
14215                  *      No holes allowed!
14216                  */
14217
14218                 if (start < entry->vme_start) {
14219                         vm_map_unlock(map);
14220                         return(FALSE);
14221                 }
14222
14223                 /*
14224                  * Check protection associated with entry.
14225                  */
14226
14227                 if ((entry->protection & protection) != protection) {
14228                         vm_map_unlock(map);
14229                         return(FALSE);
14230                 }
14231
14232                 /* go to next entry */
14233
14234                 start = entry->vme_end;
14235                 entry = entry->vme_next;
14236         }
14237         vm_map_unlock(map);
14238         return(TRUE);
14239 }
14240
14241 kern_return_t
14242 vm_map_purgable_control(
14243         vm_map_t                map,
14244         vm_map_offset_t         address,
14245         vm_purgable_t           control,
14246         int                     *state)
14247 {
14248         vm_map_entry_t          entry;
14249         vm_object_t             object;
14250         kern_return_t           kr;
14251         boolean_t               was_nonvolatile;
14252
14253         /*
14254          * Vet all the input parameters and current type and state of the
14255          * underlaying object.  Return with an error if anything is amiss.
14256          */
14257         if (map == VM_MAP_NULL)
14258                 return(KERN_INVALID_ARGUMENT);
14259
14260         if (control != VM_PURGABLE_SET_STATE &&
14261             control != VM_PURGABLE_GET_STATE &&
14262             control != VM_PURGABLE_PURGE_ALL)
14263                 return(KERN_INVALID_ARGUMENT);
14264
14265         if (control == VM_PURGABLE_PURGE_ALL) {
14266                 vm_purgeable_object_purge_all();
14267                 return KERN_SUCCESS;
14268         }
14269
14270         if (control == VM_PURGABLE_SET_STATE &&
14271             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
14272              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
14273                 return(KERN_INVALID_ARGUMENT);
14274
14275         vm_map_lock_read(map);
14276
14277         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
14278
14279                 /*
14280                  * Must pass a valid non-submap address.
14281                  */
14282                 vm_map_unlock_read(map);
14283                 return(KERN_INVALID_ADDRESS);
14284         }
14285
14286         if ((entry->protection & VM_PROT_WRITE) == 0) {
14287                 /*
14288                  * Can't apply purgable controls to something you can't write.
14289                  */
14290                 vm_map_unlock_read(map);
14291                 return(KERN_PROTECTION_FAILURE);
14292         }
14293
14294         object = VME_OBJECT(entry);
14295         if (object == VM_OBJECT_NULL ||
14296             object->purgable == VM_PURGABLE_DENY) {
14297                 /*
14298                  * Object must already be present and be purgeable.
14299                  */
14300                 vm_map_unlock_read(map);
14301                 return KERN_INVALID_ARGUMENT;
14302         }
14303
14304         vm_object_lock(object);
14305
14306 #if 00
14307         if (VME_OFFSET(entry) != 0 ||
14308             entry->vme_end - entry->vme_start != object->vo_size) {
14309                 /*
14310                  * Can only apply purgable controls to the whole (existing)
14311                  * object at once.
14312                  */
14313                 vm_map_unlock_read(map);
14314                 vm_object_unlock(object);
14315                 return KERN_INVALID_ARGUMENT;
14316         }
14317 #endif
14318
14319         assert(!entry->is_sub_map);
14320         assert(!entry->use_pmap); /* purgeable has its own accounting */
14321
14322         vm_map_unlock_read(map);
14323
14324         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
14325
14326         kr = vm_object_purgable_control(object, control, state);
14327
14328         if (was_nonvolatile &&
14329             object->purgable != VM_PURGABLE_NONVOLATILE &&
14330             map->pmap == kernel_pmap) {
14331 #if DEBUG
14332                 object->vo_purgeable_volatilizer = kernel_task;
14333 #endif /* DEBUG */
14334         }
14335
14336         vm_object_unlock(object);
14337
14338         return kr;
14339 }
14340
14341 kern_return_t
14342 vm_map_page_query_internal(
14343         vm_map_t        target_map,
14344         vm_map_offset_t offset,
14345         int             *disposition,
14346         int             *ref_count)
14347 {
14348         kern_return_t                   kr;
14349         vm_page_info_basic_data_t       info;
14350         mach_msg_type_number_t          count;
14351
14352         count = VM_PAGE_INFO_BASIC_COUNT;
14353         kr = vm_map_page_info(target_map,
14354                               offset,
14355                               VM_PAGE_INFO_BASIC,
14356                               (vm_page_info_t) &info,
14357                               &count);
14358         if (kr == KERN_SUCCESS) {
14359                 *disposition = info.disposition;
14360                 *ref_count = info.ref_count;
14361         } else {
14362                 *disposition = 0;
14363                 *ref_count = 0;
14364         }
14365
14366         return kr;
14367 }
14368
14369 kern_return_t
14370 vm_map_page_info(
14371         vm_map_t                map,
14372         vm_map_offset_t         offset,
14373         vm_page_info_flavor_t   flavor,
14374         vm_page_info_t          info,
14375         mach_msg_type_number_t  *count)
14376 {
14377         vm_map_entry_t          map_entry;
14378         vm_object_t             object;
14379         vm_page_t               m;
14380         kern_return_t           kr;
14381         kern_return_t           retval = KERN_SUCCESS;
14382         boolean_t               top_object;
14383         int                     disposition;
14384         int                     ref_count;
14385         vm_page_info_basic_t    basic_info;
14386         int                     depth;
14387         vm_map_offset_t         offset_in_page;
14388
14389         switch (flavor) {
14390         case VM_PAGE_INFO_BASIC:
14391                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
14392                         /*
14393                          * The "vm_page_info_basic_data" structure was not
14394                          * properly padded, so allow the size to be off by
14395                          * one to maintain backwards binary compatibility...
14396                          */
14397                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
14398                                 return KERN_INVALID_ARGUMENT;
14399                 }
14400                 break;
14401         default:
14402                 return KERN_INVALID_ARGUMENT;
14403         }
14404
14405         disposition = 0;
14406         ref_count = 0;
14407         top_object = TRUE;
14408         depth = 0;
14409
14410         retval = KERN_SUCCESS;
14411         offset_in_page = offset & PAGE_MASK;
14412         offset = vm_map_trunc_page(offset, PAGE_MASK);
14413
14414         vm_map_lock_read(map);
14415
14416         /*
14417          * First, find the map entry covering "offset", going down
14418          * submaps if necessary.
14419          */
14420         for (;;) {
14421                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
14422                         vm_map_unlock_read(map);
14423                         return KERN_INVALID_ADDRESS;
14424                 }
14425                 /* compute offset from this map entry's start */
14426                 offset -= map_entry->vme_start;
14427                 /* compute offset into this map entry's object (or submap) */
14428                 offset += VME_OFFSET(map_entry);
14429
14430                 if (map_entry->is_sub_map) {
14431                         vm_map_t sub_map;
14432
14433                         sub_map = VME_SUBMAP(map_entry);
14434                         vm_map_lock_read(sub_map);
14435                         vm_map_unlock_read(map);
14436
14437                         map = sub_map;
14438
14439                         ref_count = MAX(ref_count, map->ref_count);
14440                         continue;
14441                 }
14442                 break;
14443         }
14444
14445         object = VME_OBJECT(map_entry);
14446         if (object == VM_OBJECT_NULL) {
14447                 /* no object -> no page */
14448                 vm_map_unlock_read(map);
14449                 goto done;
14450         }
14451
14452         vm_object_lock(object);
14453         vm_map_unlock_read(map);
14454
14455         /*
14456          * Go down the VM object shadow chain until we find the page
14457          * we're looking for.
14458          */
14459         for (;;) {
14460                 ref_count = MAX(ref_count, object->ref_count);
14461
14462                 m = vm_page_lookup(object, offset);
14463
14464                 if (m != VM_PAGE_NULL) {
14465                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
14466                         break;
14467                 } else {
14468 #if MACH_PAGEMAP
14469                         if (object->existence_map) {
14470                                 if (vm_external_state_get(object->existence_map,
14471                                                           offset) ==
14472                                     VM_EXTERNAL_STATE_EXISTS) {
14473                                         /*
14474                                          * this page has been paged out
14475                                          */
14476                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14477                                         break;
14478                                 }
14479                         } else
14480 #endif
14481                         if (object->internal &&
14482                             object->alive &&
14483                             !object->terminating &&
14484                             object->pager_ready) {
14485
14486                                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14487                                         if (VM_COMPRESSOR_PAGER_STATE_GET(
14488                                                     object,
14489                                                     offset)
14490                                             == VM_EXTERNAL_STATE_EXISTS) {
14491                                                 /* the pager has that page */
14492                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14493                                                 break;
14494                                         }
14495                                 } else {
14496                                         memory_object_t pager;
14497
14498                                         vm_object_paging_begin(object);
14499                                         pager = object->pager;
14500                                         vm_object_unlock(object);
14501
14502                                         /*
14503                                          * Ask the default pager if
14504                                          * it has this page.
14505                                          */
14506                                         kr = memory_object_data_request(
14507                                                 pager,
14508                                                 offset + object->paging_offset,
14509                                                 0, /* just poke the pager */
14510                                                 VM_PROT_READ,
14511                                                 NULL);
14512
14513                                         vm_object_lock(object);
14514                                         vm_object_paging_end(object);
14515
14516                                         if (kr == KERN_SUCCESS) {
14517                                                 /* the default pager has it */
14518                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
14519                                                 break;
14520                                         }
14521                                 }
14522                         }
14523
14524                         if (object->shadow != VM_OBJECT_NULL) {
14525                                 vm_object_t shadow;
14526
14527                                 offset += object->vo_shadow_offset;
14528                                 shadow = object->shadow;
14529
14530                                 vm_object_lock(shadow);
14531                                 vm_object_unlock(object);
14532
14533                                 object = shadow;
14534                                 top_object = FALSE;
14535                                 depth++;
14536                         } else {
14537 //                              if (!object->internal)
14538 //                                      break;
14539 //                              retval = KERN_FAILURE;
14540 //                              goto done_with_object;
14541                                 break;
14542                         }
14543                 }
14544         }
14545         /* The ref_count is not strictly accurate, it measures the number   */
14546         /* of entities holding a ref on the object, they may not be mapping */
14547         /* the object or may not be mapping the section holding the         */
14548         /* target page but its still a ball park number and though an over- */
14549         /* count, it picks up the copy-on-write cases                       */
14550
14551         /* We could also get a picture of page sharing from pmap_attributes */
14552         /* but this would under count as only faulted-in mappings would     */
14553         /* show up.                                                         */
14554
14555         if (top_object == TRUE && object->shadow)
14556                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
14557
14558         if (! object->internal)
14559                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
14560
14561         if (m == VM_PAGE_NULL)
14562                 goto done_with_object;
14563
14564         if (m->fictitious) {
14565                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
14566                 goto done_with_object;
14567         }
14568         if (m->dirty || pmap_is_modified(m->phys_page))
14569                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
14570
14571         if (m->reference || pmap_is_referenced(m->phys_page))
14572                 disposition |= VM_PAGE_QUERY_PAGE_REF;
14573
14574         if (m->speculative)
14575                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
14576
14577         if (m->cs_validated)
14578                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
14579         if (m->cs_tainted)
14580                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
14581         if (m->cs_nx)
14582                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
14583
14584 done_with_object:
14585         vm_object_unlock(object);
14586 done:
14587
14588         switch (flavor) {
14589         case VM_PAGE_INFO_BASIC:
14590                 basic_info = (vm_page_info_basic_t) info;
14591                 basic_info->disposition = disposition;
14592                 basic_info->ref_count = ref_count;
14593                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
14594                         VM_KERNEL_ADDRPERM(object);
14595                 basic_info->offset =
14596                         (memory_object_offset_t) offset + offset_in_page;
14597                 basic_info->depth = depth;
14598                 break;
14599         }
14600
14601         return retval;
14602 }
14603
14604 /*
14605  *      vm_map_msync
14606  *
14607  *      Synchronises the memory range specified with its backing store
14608  *      image by either flushing or cleaning the contents to the appropriate
14609  *      memory manager engaging in a memory object synchronize dialog with
14610  *      the manager.  The client doesn't return until the manager issues
14611  *      m_o_s_completed message.  MIG Magically converts user task parameter
14612  *      to the task's address map.
14613  *
14614  *      interpretation of sync_flags
14615  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
14616  *                                pages to manager.
14617  *
14618  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
14619  *                              - discard pages, write dirty or precious
14620  *                                pages back to memory manager.
14621  *
14622  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
14623  *                              - write dirty or precious pages back to
14624  *                                the memory manager.
14625  *
14626  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
14627  *                                is a hole in the region, and we would
14628  *                                have returned KERN_SUCCESS, return
14629  *                                KERN_INVALID_ADDRESS instead.
14630  *
14631  *      NOTE
14632  *      The memory object attributes have not yet been implemented, this
14633  *      function will have to deal with the invalidate attribute
14634  *
14635  *      RETURNS
14636  *      KERN_INVALID_TASK               Bad task parameter
14637  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
14638  *      KERN_SUCCESS                    The usual.
14639  *      KERN_INVALID_ADDRESS            There was a hole in the region.
14640  */
14641
14642 kern_return_t
14643 vm_map_msync(
14644         vm_map_t                map,
14645         vm_map_address_t        address,
14646         vm_map_size_t           size,
14647         vm_sync_t               sync_flags)
14648 {
14649         msync_req_t             msr;
14650         msync_req_t             new_msr;
14651         queue_chain_t           req_q;  /* queue of requests for this msync */
14652         vm_map_entry_t          entry;
14653         vm_map_size_t           amount_left;
14654         vm_object_offset_t      offset;
14655         boolean_t               do_sync_req;
14656         boolean_t               had_hole = FALSE;
14657         memory_object_t         pager;
14658         vm_map_offset_t         pmap_offset;
14659
14660         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
14661             (sync_flags & VM_SYNC_SYNCHRONOUS))
14662                 return(KERN_INVALID_ARGUMENT);
14663
14664         /*
14665          * align address and size on page boundaries
14666          */
14667         size = (vm_map_round_page(address + size,
14668                                   VM_MAP_PAGE_MASK(map)) -
14669                 vm_map_trunc_page(address,
14670                                   VM_MAP_PAGE_MASK(map)));
14671         address = vm_map_trunc_page(address,
14672                                     VM_MAP_PAGE_MASK(map));
14673
14674         if (map == VM_MAP_NULL)
14675                 return(KERN_INVALID_TASK);
14676
14677         if (size == 0)
14678                 return(KERN_SUCCESS);
14679
14680         queue_init(&req_q);
14681         amount_left = size;
14682
14683         while (amount_left > 0) {
14684                 vm_object_size_t        flush_size;
14685                 vm_object_t             object;
14686
14687                 vm_map_lock(map);
14688                 if (!vm_map_lookup_entry(map,
14689                                          address,
14690                                          &entry)) {
14691
14692                         vm_map_size_t   skip;
14693
14694                         /*
14695                          * hole in the address map.
14696                          */
14697                         had_hole = TRUE;
14698
14699                         /*
14700                          * Check for empty map.
14701                          */
14702                         if (entry == vm_map_to_entry(map) &&
14703                             entry->vme_next == entry) {
14704                                 vm_map_unlock(map);
14705                                 break;
14706                         }
14707                         /*
14708                          * Check that we don't wrap and that
14709                          * we have at least one real map entry.
14710                          */
14711                         if ((map->hdr.nentries == 0) ||
14712                             (entry->vme_next->vme_start < address)) {
14713                                 vm_map_unlock(map);
14714                                 break;
14715                         }
14716                         /*
14717                          * Move up to the next entry if needed
14718                          */
14719                         skip = (entry->vme_next->vme_start - address);
14720                         if (skip >= amount_left)
14721                                 amount_left = 0;
14722                         else
14723                                 amount_left -= skip;
14724                         address = entry->vme_next->vme_start;
14725                         vm_map_unlock(map);
14726                         continue;
14727                 }
14728
14729                 offset = address - entry->vme_start;
14730                 pmap_offset = address;
14731
14732                 /*
14733                  * do we have more to flush than is contained in this
14734                  * entry ?
14735                  */
14736                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
14737                         flush_size = entry->vme_end -
14738                                 (entry->vme_start + offset);
14739                 } else {
14740                         flush_size = amount_left;
14741                 }
14742                 amount_left -= flush_size;
14743                 address += flush_size;
14744
14745                 if (entry->is_sub_map == TRUE) {
14746                         vm_map_t        local_map;
14747                         vm_map_offset_t local_offset;
14748
14749                         local_map = VME_SUBMAP(entry);
14750                         local_offset = VME_OFFSET(entry);
14751                         vm_map_unlock(map);
14752                         if (vm_map_msync(
14753                                     local_map,
14754                                     local_offset,
14755                                     flush_size,
14756                                     sync_flags) == KERN_INVALID_ADDRESS) {
14757                                 had_hole = TRUE;
14758                         }
14759                         continue;
14760                 }
14761                 object = VME_OBJECT(entry);
14762
14763                 /*
14764                  * We can't sync this object if the object has not been
14765                  * created yet
14766                  */
14767                 if (object == VM_OBJECT_NULL) {
14768                         vm_map_unlock(map);
14769                         continue;
14770                 }
14771                 offset += VME_OFFSET(entry);
14772
14773                 vm_object_lock(object);
14774
14775                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
14776                         int kill_pages = 0;
14777                         boolean_t reusable_pages = FALSE;
14778
14779                         if (sync_flags & VM_SYNC_KILLPAGES) {
14780                                 if (object->ref_count == 1 && !object->shadow)
14781                                         kill_pages = 1;
14782                                 else
14783                                         kill_pages = -1;
14784                         }
14785                         if (kill_pages != -1)
14786                                 vm_object_deactivate_pages(
14787                                         object,
14788                                         offset,
14789                                         (vm_object_size_t) flush_size,
14790                                         kill_pages,
14791                                         reusable_pages,
14792                                         map->pmap,
14793                                         pmap_offset);
14794                         vm_object_unlock(object);
14795                         vm_map_unlock(map);
14796                         continue;
14797                 }
14798                 /*
14799                  * We can't sync this object if there isn't a pager.
14800                  * Don't bother to sync internal objects, since there can't
14801                  * be any "permanent" storage for these objects anyway.
14802                  */
14803                 if ((object->pager == MEMORY_OBJECT_NULL) ||
14804                     (object->internal) || (object->private)) {
14805                         vm_object_unlock(object);
14806                         vm_map_unlock(map);
14807                         continue;
14808                 }
14809                 /*
14810                  * keep reference on the object until syncing is done
14811                  */
14812                 vm_object_reference_locked(object);
14813                 vm_object_unlock(object);
14814
14815                 vm_map_unlock(map);
14816
14817                 do_sync_req = vm_object_sync(object,
14818                                              offset,
14819                                              flush_size,
14820                                              sync_flags & VM_SYNC_INVALIDATE,
14821                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
14822                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
14823                                              sync_flags & VM_SYNC_SYNCHRONOUS);
14824                 /*
14825                  * only send a m_o_s if we returned pages or if the entry
14826                  * is writable (ie dirty pages may have already been sent back)
14827                  */
14828                 if (!do_sync_req) {
14829                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
14830                                 /*
14831                                  * clear out the clustering and read-ahead hints
14832                                  */
14833                                 vm_object_lock(object);
14834
14835                                 object->pages_created = 0;
14836                                 object->pages_used = 0;
14837                                 object->sequential = 0;
14838                                 object->last_alloc = 0;
14839
14840                                 vm_object_unlock(object);
14841                         }
14842                         vm_object_deallocate(object);
14843                         continue;
14844                 }
14845                 msync_req_alloc(new_msr);
14846
14847                 vm_object_lock(object);
14848                 offset += object->paging_offset;
14849
14850                 new_msr->offset = offset;
14851                 new_msr->length = flush_size;
14852                 new_msr->object = object;
14853                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
14854         re_iterate:
14855
14856                 /*
14857                  * We can't sync this object if there isn't a pager.  The
14858                  * pager can disappear anytime we're not holding the object
14859                  * lock.  So this has to be checked anytime we goto re_iterate.
14860                  */
14861
14862                 pager = object->pager;
14863
14864                 if (pager == MEMORY_OBJECT_NULL) {
14865                         vm_object_unlock(object);
14866                         vm_object_deallocate(object);
14867                         msync_req_free(new_msr);
14868                         new_msr = NULL;
14869                         continue;
14870                 }
14871
14872                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14873                         /*
14874                          * need to check for overlapping entry, if found, wait
14875                          * on overlapping msr to be done, then reiterate
14876                          */
14877                         msr_lock(msr);
14878                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14879                             ((offset >= msr->offset &&
14880                               offset < (msr->offset + msr->length)) ||
14881                              (msr->offset >= offset &&
14882                               msr->offset < (offset + flush_size))))
14883                         {
14884                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14885                                 msr_unlock(msr);
14886                                 vm_object_unlock(object);
14887                                 thread_block(THREAD_CONTINUE_NULL);
14888                                 vm_object_lock(object);
14889                                 goto re_iterate;
14890                         }
14891                         msr_unlock(msr);
14892                 }/* queue_iterate */
14893
14894                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14895
14896                 vm_object_paging_begin(object);
14897                 vm_object_unlock(object);
14898
14899                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14900
14901                 (void) memory_object_synchronize(
14902                         pager,
14903                         offset,
14904                         flush_size,
14905                         sync_flags & ~VM_SYNC_CONTIGUOUS);
14906
14907                 vm_object_lock(object);
14908                 vm_object_paging_end(object);
14909                 vm_object_unlock(object);
14910         }/* while */
14911
14912         /*
14913          * wait for memory_object_sychronize_completed messages from pager(s)
14914          */
14915
14916         while (!queue_empty(&req_q)) {
14917                 msr = (msync_req_t)queue_first(&req_q);
14918                 msr_lock(msr);
14919                 while(msr->flag != VM_MSYNC_DONE) {
14920                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14921                         msr_unlock(msr);
14922                         thread_block(THREAD_CONTINUE_NULL);
14923                         msr_lock(msr);
14924                 }/* while */
14925                 queue_remove(&req_q, msr, msync_req_t, req_q);
14926                 msr_unlock(msr);
14927                 vm_object_deallocate(msr->object);
14928                 msync_req_free(msr);
14929         }/* queue_iterate */
14930
14931         /* for proper msync() behaviour */
14932         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14933                 return(KERN_INVALID_ADDRESS);
14934
14935         return(KERN_SUCCESS);
14936 }/* vm_msync */
14937
14938 /*
14939  *      Routine:        convert_port_entry_to_map
14940  *      Purpose:
14941  *              Convert from a port specifying an entry or a task
14942  *              to a map. Doesn't consume the port ref; produces a map ref,
14943  *              which may be null.  Unlike convert_port_to_map, the
14944  *              port may be task or a named entry backed.
14945  *      Conditions:
14946  *              Nothing locked.
14947  */
14948
14949
14950 vm_map_t
14951 convert_port_entry_to_map(
14952         ipc_port_t      port)
14953 {
14954         vm_map_t map;
14955         vm_named_entry_t        named_entry;
14956         uint32_t        try_failed_count = 0;
14957
14958         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14959                 while(TRUE) {
14960                         ip_lock(port);
14961                         if(ip_active(port) && (ip_kotype(port)
14962                                                == IKOT_NAMED_ENTRY)) {
14963                                 named_entry =
14964                                         (vm_named_entry_t)port->ip_kobject;
14965                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14966                                         ip_unlock(port);
14967
14968                                         try_failed_count++;
14969                                         mutex_pause(try_failed_count);
14970                                         continue;
14971                                 }
14972                                 named_entry->ref_count++;
14973                                 lck_mtx_unlock(&(named_entry)->Lock);
14974                                 ip_unlock(port);
14975                                 if ((named_entry->is_sub_map) &&
14976                                     (named_entry->protection
14977                                      & VM_PROT_WRITE)) {
14978                                         map = named_entry->backing.map;
14979                                 } else {
14980                                         mach_destroy_memory_entry(port);
14981                                         return VM_MAP_NULL;
14982                                 }
14983                                 vm_map_reference_swap(map);
14984                                 mach_destroy_memory_entry(port);
14985                                 break;
14986                         }
14987                         else
14988                                 return VM_MAP_NULL;
14989                 }
14990         }
14991         else
14992                 map = convert_port_to_map(port);
14993
14994         return map;
14995 }
14996
14997 /*
14998  *      Routine:        convert_port_entry_to_object
14999  *      Purpose:
15000  *              Convert from a port specifying a named entry to an
15001  *              object. Doesn't consume the port ref; produces a map ref,
15002  *              which may be null.
15003  *      Conditions:
15004  *              Nothing locked.
15005  */
15006
15007
15008 vm_object_t
15009 convert_port_entry_to_object(
15010         ipc_port_t      port)
15011 {
15012         vm_object_t             object = VM_OBJECT_NULL;
15013         vm_named_entry_t        named_entry;
15014         uint32_t                try_failed_count = 0;
15015
15016         if (IP_VALID(port) &&
15017             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15018         try_again:
15019                 ip_lock(port);
15020                 if (ip_active(port) &&
15021                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
15022                         named_entry = (vm_named_entry_t)port->ip_kobject;
15023                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
15024                                 ip_unlock(port);
15025                                 try_failed_count++;
15026                                 mutex_pause(try_failed_count);
15027                                 goto try_again;
15028                         }
15029                         named_entry->ref_count++;
15030                         lck_mtx_unlock(&(named_entry)->Lock);
15031                         ip_unlock(port);
15032                         if (!(named_entry->is_sub_map) &&
15033                             !(named_entry->is_pager) &&
15034                             !(named_entry->is_copy) &&
15035                             (named_entry->protection & VM_PROT_WRITE)) {
15036                                 object = named_entry->backing.object;
15037                                 vm_object_reference(object);
15038                         }
15039                         mach_destroy_memory_entry(port);
15040                 }
15041         }
15042
15043         return object;
15044 }
15045
15046 /*
15047  * Export routines to other components for the things we access locally through
15048  * macros.
15049  */
15050 #undef current_map
15051 vm_map_t
15052 current_map(void)
15053 {
15054         return (current_map_fast());
15055 }
15056
15057 /*
15058  *      vm_map_reference:
15059  *
15060  *      Most code internal to the osfmk will go through a
15061  *      macro defining this.  This is always here for the
15062  *      use of other kernel components.
15063  */
15064 #undef vm_map_reference
15065 void
15066 vm_map_reference(
15067         register vm_map_t       map)
15068 {
15069         if (map == VM_MAP_NULL)
15070                 return;
15071
15072         lck_mtx_lock(&map->s_lock);
15073 #if     TASK_SWAPPER
15074         assert(map->res_count > 0);
15075         assert(map->ref_count >= map->res_count);
15076         map->res_count++;
15077 #endif
15078         map->ref_count++;
15079         lck_mtx_unlock(&map->s_lock);
15080 }
15081
15082 /*
15083  *      vm_map_deallocate:
15084  *
15085  *      Removes a reference from the specified map,
15086  *      destroying it if no references remain.
15087  *      The map should not be locked.
15088  */
15089 void
15090 vm_map_deallocate(
15091         register vm_map_t       map)
15092 {
15093         unsigned int            ref;
15094
15095         if (map == VM_MAP_NULL)
15096                 return;
15097
15098         lck_mtx_lock(&map->s_lock);
15099         ref = --map->ref_count;
15100         if (ref > 0) {
15101                 vm_map_res_deallocate(map);
15102                 lck_mtx_unlock(&map->s_lock);
15103                 return;
15104         }
15105         assert(map->ref_count == 0);
15106         lck_mtx_unlock(&map->s_lock);
15107
15108 #if     TASK_SWAPPER
15109         /*
15110          * The map residence count isn't decremented here because
15111          * the vm_map_delete below will traverse the entire map,
15112          * deleting entries, and the residence counts on objects
15113          * and sharing maps will go away then.
15114          */
15115 #endif
15116
15117         vm_map_destroy(map, VM_MAP_NO_FLAGS);
15118 }
15119
15120
15121 void
15122 vm_map_disable_NX(vm_map_t map)
15123 {
15124         if (map == NULL)
15125                 return;
15126         if (map->pmap == NULL)
15127                 return;
15128
15129         pmap_disable_NX(map->pmap);
15130 }
15131
15132 void
15133 vm_map_disallow_data_exec(vm_map_t map)
15134 {
15135     if (map == NULL)
15136         return;
15137
15138     map->map_disallow_data_exec = TRUE;
15139 }
15140
15141 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
15142  * more descriptive.
15143  */
15144 void
15145 vm_map_set_32bit(vm_map_t map)
15146 {
15147         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
15148 }
15149
15150
15151 void
15152 vm_map_set_64bit(vm_map_t map)
15153 {
15154         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
15155 }
15156
15157 vm_map_offset_t
15158 vm_compute_max_offset(boolean_t is64)
15159 {
15160         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
15161 }
15162
15163 uint64_t
15164 vm_map_get_max_aslr_slide_pages(vm_map_t map)
15165 {
15166         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
15167 }
15168
15169 boolean_t
15170 vm_map_is_64bit(
15171                 vm_map_t map)
15172 {
15173         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
15174 }
15175
15176 boolean_t
15177 vm_map_has_hard_pagezero(
15178                 vm_map_t        map,
15179                 vm_map_offset_t pagezero_size)
15180 {
15181         /*
15182          * XXX FBDP
15183          * We should lock the VM map (for read) here but we can get away
15184          * with it for now because there can't really be any race condition:
15185          * the VM map's min_offset is changed only when the VM map is created
15186          * and when the zero page is established (when the binary gets loaded),
15187          * and this routine gets called only when the task terminates and the
15188          * VM map is being torn down, and when a new map is created via
15189          * load_machfile()/execve().
15190          */
15191         return (map->min_offset >= pagezero_size);
15192 }
15193
15194 /*
15195  * Raise a VM map's maximun offset.
15196  */
15197 kern_return_t
15198 vm_map_raise_max_offset(
15199         vm_map_t        map,
15200         vm_map_offset_t new_max_offset)
15201 {
15202         kern_return_t   ret;
15203
15204         vm_map_lock(map);
15205         ret = KERN_INVALID_ADDRESS;
15206
15207         if (new_max_offset >= map->max_offset) {
15208                 if (!vm_map_is_64bit(map)) {
15209                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
15210                                 map->max_offset = new_max_offset;
15211                                 ret = KERN_SUCCESS;
15212                         }
15213                 } else {
15214                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
15215                                 map->max_offset = new_max_offset;
15216                                 ret = KERN_SUCCESS;
15217                         }
15218                 }
15219         }
15220
15221         vm_map_unlock(map);
15222         return ret;
15223 }
15224
15225
15226 /*
15227  * Raise a VM map's minimum offset.
15228  * To strictly enforce "page zero" reservation.
15229  */
15230 kern_return_t
15231 vm_map_raise_min_offset(
15232         vm_map_t        map,
15233         vm_map_offset_t new_min_offset)
15234 {
15235         vm_map_entry_t  first_entry;
15236
15237         new_min_offset = vm_map_round_page(new_min_offset,
15238                                            VM_MAP_PAGE_MASK(map));
15239
15240         vm_map_lock(map);
15241
15242         if (new_min_offset < map->min_offset) {
15243                 /*
15244                  * Can't move min_offset backwards, as that would expose
15245                  * a part of the address space that was previously, and for
15246                  * possibly good reasons, inaccessible.
15247                  */
15248                 vm_map_unlock(map);
15249                 return KERN_INVALID_ADDRESS;
15250         }
15251         if (new_min_offset >= map->max_offset) {
15252                 /* can't go beyond the end of the address space */
15253                 vm_map_unlock(map);
15254                 return KERN_INVALID_ADDRESS;
15255         }
15256
15257         first_entry = vm_map_first_entry(map);
15258         if (first_entry != vm_map_to_entry(map) &&
15259             first_entry->vme_start < new_min_offset) {
15260                 /*
15261                  * Some memory was already allocated below the new
15262                  * minimun offset.  It's too late to change it now...
15263                  */
15264                 vm_map_unlock(map);
15265                 return KERN_NO_SPACE;
15266         }
15267
15268         map->min_offset = new_min_offset;
15269
15270         assert(map->holes_list);
15271         map->holes_list->start = new_min_offset;
15272         assert(new_min_offset < map->holes_list->end);
15273
15274         vm_map_unlock(map);
15275
15276         return KERN_SUCCESS;
15277 }
15278
15279 /*
15280  * Set the limit on the maximum amount of user wired memory allowed for this map.
15281  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
15282  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
15283  * don't have to reach over to the BSD data structures.
15284  */
15285
15286 void
15287 vm_map_set_user_wire_limit(vm_map_t     map,
15288                            vm_size_t    limit)
15289 {
15290         map->user_wire_limit = limit;
15291 }
15292
15293
15294 void vm_map_switch_protect(vm_map_t     map,
15295                            boolean_t    val)
15296 {
15297         vm_map_lock(map);
15298         map->switch_protect=val;
15299         vm_map_unlock(map);
15300 }
15301
15302 /*
15303  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
15304  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
15305  * bump both counters.
15306  */
15307 void
15308 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
15309 {
15310         pmap_t pmap = vm_map_pmap(map);
15311
15312         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15313         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15314 }
15315
15316 void
15317 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
15318 {
15319         pmap_t pmap = vm_map_pmap(map);
15320
15321         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
15322         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
15323 }
15324
15325 /* Add (generate) code signature for memory range */
15326 #if CONFIG_DYNAMIC_CODE_SIGNING
15327 kern_return_t vm_map_sign(vm_map_t map,
15328                  vm_map_offset_t start,
15329                  vm_map_offset_t end)
15330 {
15331         vm_map_entry_t entry;
15332         vm_page_t m;
15333         vm_object_t object;
15334
15335         /*
15336          * Vet all the input parameters and current type and state of the
15337          * underlaying object.  Return with an error if anything is amiss.
15338          */
15339         if (map == VM_MAP_NULL)
15340                 return(KERN_INVALID_ARGUMENT);
15341
15342         vm_map_lock_read(map);
15343
15344         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
15345                 /*
15346                  * Must pass a valid non-submap address.
15347                  */
15348                 vm_map_unlock_read(map);
15349                 return(KERN_INVALID_ADDRESS);
15350         }
15351
15352         if((entry->vme_start > start) || (entry->vme_end < end)) {
15353                 /*
15354                  * Map entry doesn't cover the requested range. Not handling
15355                  * this situation currently.
15356                  */
15357                 vm_map_unlock_read(map);
15358                 return(KERN_INVALID_ARGUMENT);
15359         }
15360
15361         object = VME_OBJECT(entry);
15362         if (object == VM_OBJECT_NULL) {
15363                 /*
15364                  * Object must already be present or we can't sign.
15365                  */
15366                 vm_map_unlock_read(map);
15367                 return KERN_INVALID_ARGUMENT;
15368         }
15369
15370         vm_object_lock(object);
15371         vm_map_unlock_read(map);
15372
15373         while(start < end) {
15374                 uint32_t refmod;
15375
15376                 m = vm_page_lookup(object,
15377                                    start - entry->vme_start + VME_OFFSET(entry));
15378                 if (m==VM_PAGE_NULL) {
15379                         /* shoud we try to fault a page here? we can probably
15380                          * demand it exists and is locked for this request */
15381                         vm_object_unlock(object);
15382                         return KERN_FAILURE;
15383                 }
15384                 /* deal with special page status */
15385                 if (m->busy ||
15386                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
15387                         vm_object_unlock(object);
15388                         return KERN_FAILURE;
15389                 }
15390
15391                 /* Page is OK... now "validate" it */
15392                 /* This is the place where we'll call out to create a code
15393                  * directory, later */
15394                 m->cs_validated = TRUE;
15395
15396                 /* The page is now "clean" for codesigning purposes. That means
15397                  * we don't consider it as modified (wpmapped) anymore. But
15398                  * we'll disconnect the page so we note any future modification
15399                  * attempts. */
15400                 m->wpmapped = FALSE;
15401                 refmod = pmap_disconnect(m->phys_page);
15402
15403                 /* Pull the dirty status from the pmap, since we cleared the
15404                  * wpmapped bit */
15405                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
15406                         SET_PAGE_DIRTY(m, FALSE);
15407                 }
15408
15409                 /* On to the next page */
15410                 start += PAGE_SIZE;
15411         }
15412         vm_object_unlock(object);
15413
15414         return KERN_SUCCESS;
15415 }
15416 #endif
15417
15418 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
15419 {
15420         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
15421         vm_map_entry_t next_entry;
15422         kern_return_t   kr = KERN_SUCCESS;
15423         vm_map_t        zap_map;
15424
15425         vm_map_lock(map);
15426
15427         /*
15428          * We use a "zap_map" to avoid having to unlock
15429          * the "map" in vm_map_delete().
15430          */
15431         zap_map = vm_map_create(PMAP_NULL,
15432                                 map->min_offset,
15433                                 map->max_offset,
15434                                 map->hdr.entries_pageable);
15435
15436         if (zap_map == VM_MAP_NULL) {
15437                 return KERN_RESOURCE_SHORTAGE;
15438         }
15439
15440         vm_map_set_page_shift(zap_map,
15441                               VM_MAP_PAGE_SHIFT(map));
15442         vm_map_disable_hole_optimization(zap_map);
15443
15444         for (entry = vm_map_first_entry(map);
15445              entry != vm_map_to_entry(map);
15446              entry = next_entry) {
15447                 next_entry = entry->vme_next;
15448
15449                 if (VME_OBJECT(entry) &&
15450                     !entry->is_sub_map &&
15451                     (VME_OBJECT(entry)->internal == TRUE) &&
15452                     (VME_OBJECT(entry)->ref_count == 1)) {
15453
15454                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
15455                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
15456
15457                         (void)vm_map_delete(map,
15458                                             entry->vme_start,
15459                                             entry->vme_end,
15460                                             VM_MAP_REMOVE_SAVE_ENTRIES,
15461                                             zap_map);
15462                 }
15463         }
15464
15465         vm_map_unlock(map);
15466
15467         /*
15468          * Get rid of the "zap_maps" and all the map entries that
15469          * they may still contain.
15470          */
15471         if (zap_map != VM_MAP_NULL) {
15472                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15473                 zap_map = VM_MAP_NULL;
15474         }
15475
15476         return kr;
15477 }
15478
15479 #if CONFIG_FREEZE
15480
15481 kern_return_t vm_map_freeze_walk(
15482                 vm_map_t map,
15483                 unsigned int *purgeable_count,
15484                 unsigned int *wired_count,
15485                 unsigned int *clean_count,
15486                 unsigned int *dirty_count,
15487                 unsigned int  dirty_budget,
15488                 boolean_t *has_shared)
15489 {
15490         vm_map_entry_t entry;
15491
15492         vm_map_lock_read(map);
15493
15494         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15495         *has_shared = FALSE;
15496
15497         for (entry = vm_map_first_entry(map);
15498              entry != vm_map_to_entry(map);
15499              entry = entry->vme_next) {
15500                 unsigned int purgeable, clean, dirty, wired;
15501                 boolean_t shared;
15502
15503                 if ((VME_OBJECT(entry) == 0) ||
15504                     (entry->is_sub_map) ||
15505                     (VME_OBJECT(entry)->phys_contiguous)) {
15506                         continue;
15507                 }
15508
15509                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, VME_OBJECT(entry), NULL);
15510
15511                 *purgeable_count += purgeable;
15512                 *wired_count += wired;
15513                 *clean_count += clean;
15514                 *dirty_count += dirty;
15515
15516                 if (shared) {
15517                         *has_shared = TRUE;
15518                 }
15519
15520                 /* Adjust pageout budget and finish up if reached */
15521                 if (dirty_budget) {
15522                         dirty_budget -= dirty;
15523                         if (dirty_budget == 0) {
15524                                 break;
15525                         }
15526                 }
15527         }
15528
15529         vm_map_unlock_read(map);
15530
15531         return KERN_SUCCESS;
15532 }
15533
15534 int c_freezer_swapout_count;
15535 int c_freezer_compression_count = 0;
15536 AbsoluteTime c_freezer_last_yield_ts = 0;
15537
15538 kern_return_t vm_map_freeze(
15539                 vm_map_t map,
15540                 unsigned int *purgeable_count,
15541                 unsigned int *wired_count,
15542                 unsigned int *clean_count,
15543                 unsigned int *dirty_count,
15544                 unsigned int dirty_budget,
15545                 boolean_t *has_shared)
15546 {
15547         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
15548         kern_return_t   kr = KERN_SUCCESS;
15549         boolean_t       default_freezer_active = TRUE;
15550
15551         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
15552         *has_shared = FALSE;
15553
15554         /*
15555          * We need the exclusive lock here so that we can
15556          * block any page faults or lookups while we are
15557          * in the middle of freezing this vm map.
15558          */
15559         vm_map_lock(map);
15560
15561         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15562                 default_freezer_active = FALSE;
15563
15564                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15565                         kr = KERN_NO_SPACE;
15566                         goto done;
15567                 }
15568         }
15569         assert(default_freezer_active == FALSE);
15570
15571         if (default_freezer_active) {
15572                 if (map->default_freezer_handle == NULL) {
15573                         map->default_freezer_handle = default_freezer_handle_allocate();
15574                 }
15575
15576                 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
15577                         /*
15578                          * Can happen if default_freezer_handle passed in is NULL
15579                          * Or, a table has already been allocated and associated
15580                          * with this handle, i.e. the map is already frozen.
15581                          */
15582                         goto done;
15583                 }
15584         }
15585         c_freezer_compression_count = 0;
15586         clock_get_uptime(&c_freezer_last_yield_ts);
15587
15588         for (entry2 = vm_map_first_entry(map);
15589              entry2 != vm_map_to_entry(map);
15590              entry2 = entry2->vme_next) {
15591
15592                 vm_object_t     src_object = VME_OBJECT(entry2);
15593
15594                 if (VME_OBJECT(entry2) &&
15595                     !entry2->is_sub_map &&
15596                     !VME_OBJECT(entry2)->phys_contiguous) {
15597                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
15598                         if (default_freezer_active) {
15599                                 unsigned int purgeable, clean, dirty, wired;
15600                                 boolean_t shared;
15601
15602                                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
15603                                                                 src_object, map->default_freezer_handle);
15604
15605                                 *purgeable_count += purgeable;
15606                                 *wired_count += wired;
15607                                 *clean_count += clean;
15608                                 *dirty_count += dirty;
15609
15610                                 /* Adjust pageout budget and finish up if reached */
15611                                 if (dirty_budget) {
15612                                         dirty_budget -= dirty;
15613                                         if (dirty_budget == 0) {
15614                                                 break;
15615                                         }
15616                                 }
15617
15618                                 if (shared) {
15619                                         *has_shared = TRUE;
15620                                 }
15621                         } else {
15622                                 if (VME_OBJECT(entry2)->internal == TRUE) {
15623
15624                                         if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15625                                                 /*
15626                                                  * Pages belonging to this object could be swapped to disk.
15627                                                  * Make sure it's not a shared object because we could end
15628                                                  * up just bringing it back in again.
15629                                                  */
15630                                                 if (VME_OBJECT(entry2)->ref_count > 1) {
15631                                                         continue;
15632                                                 }
15633                                         }
15634                                         vm_object_compressed_freezer_pageout(VME_OBJECT(entry2));
15635                                 }
15636
15637                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
15638                                         kr = KERN_NO_SPACE;
15639                                         break;
15640                                 }
15641                         }
15642                 }
15643         }
15644
15645         if (default_freezer_active) {
15646                 /* Finally, throw out the pages to swap */
15647                 default_freezer_pageout(map->default_freezer_handle);
15648         }
15649
15650 done:
15651         vm_map_unlock(map);
15652
15653         if (!default_freezer_active) {
15654                 vm_object_compressed_freezer_done();
15655         }
15656         if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
15657                 /*
15658                  * reset the counter tracking the # of swapped c_segs
15659                  * because we are now done with this freeze session and task.
15660                  */
15661                 c_freezer_swapout_count = 0;
15662         }
15663         return kr;
15664 }
15665
15666 kern_return_t
15667 vm_map_thaw(
15668         vm_map_t map)
15669 {
15670         kern_return_t kr = KERN_SUCCESS;
15671
15672         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
15673                 /*
15674                  * We will on-demand thaw in the presence of the compressed pager.
15675                  */
15676                 return kr;
15677         }
15678
15679         vm_map_lock(map);
15680
15681         if (map->default_freezer_handle == NULL) {
15682                 /*
15683                  * This map is not in a frozen state.
15684                  */
15685                 kr = KERN_FAILURE;
15686                 goto out;
15687         }
15688
15689         kr = default_freezer_unpack(map->default_freezer_handle);
15690 out:
15691         vm_map_unlock(map);
15692
15693         return kr;
15694 }
15695 #endif
15696
15697 /*
15698  * vm_map_entry_should_cow_for_true_share:
15699  *
15700  * Determines if the map entry should be clipped and setup for copy-on-write
15701  * to avoid applying "true_share" to a large VM object when only a subset is
15702  * targeted.
15703  *
15704  * For now, we target only the map entries created for the Objective C
15705  * Garbage Collector, which initially have the following properties:
15706  *      - alias == VM_MEMORY_MALLOC
15707  *      - wired_count == 0
15708  *      - !needs_copy
15709  * and a VM object with:
15710  *      - internal
15711  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
15712  *      - !true_share
15713  *      - vo_size == ANON_CHUNK_SIZE
15714  *
15715  * Only non-kernel map entries.
15716  */
15717 boolean_t
15718 vm_map_entry_should_cow_for_true_share(
15719         vm_map_entry_t  entry)
15720 {
15721         vm_object_t     object;
15722
15723         if (entry->is_sub_map) {
15724                 /* entry does not point at a VM object */
15725                 return FALSE;
15726         }
15727
15728         if (entry->needs_copy) {
15729                 /* already set for copy_on_write: done! */
15730                 return FALSE;
15731         }
15732
15733         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
15734             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
15735                 /* not a malloc heap or Obj-C Garbage Collector heap */
15736                 return FALSE;
15737         }
15738
15739         if (entry->wired_count) {
15740                 /* wired: can't change the map entry... */
15741                 vm_counters.should_cow_but_wired++;
15742                 return FALSE;
15743         }
15744
15745         object = VME_OBJECT(entry);
15746
15747         if (object == VM_OBJECT_NULL) {
15748                 /* no object yet... */
15749                 return FALSE;
15750         }
15751
15752         if (!object->internal) {
15753                 /* not an internal object */
15754                 return FALSE;
15755         }
15756
15757         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
15758                 /* not the default copy strategy */
15759                 return FALSE;
15760         }
15761
15762         if (object->true_share) {
15763                 /* already true_share: too late to avoid it */
15764                 return FALSE;
15765         }
15766
15767         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
15768             object->vo_size != ANON_CHUNK_SIZE) {
15769                 /* ... not an object created for the ObjC Garbage Collector */
15770                 return FALSE;
15771         }
15772
15773         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
15774             object->vo_size != 2048 * 4096) {
15775                 /* ... not a "MALLOC_SMALL" heap */
15776                 return FALSE;
15777         }
15778
15779         /*
15780          * All the criteria match: we have a large object being targeted for "true_share".
15781          * To limit the adverse side-effects linked with "true_share", tell the caller to
15782          * try and avoid setting up the entire object for "true_share" by clipping the
15783          * targeted range and setting it up for copy-on-write.
15784          */
15785         return TRUE;
15786 }
15787
15788 vm_map_offset_t
15789 vm_map_round_page_mask(
15790         vm_map_offset_t offset,
15791         vm_map_offset_t mask)
15792 {
15793         return VM_MAP_ROUND_PAGE(offset, mask);
15794 }
15795
15796 vm_map_offset_t
15797 vm_map_trunc_page_mask(
15798         vm_map_offset_t offset,
15799         vm_map_offset_t mask)
15800 {
15801         return VM_MAP_TRUNC_PAGE(offset, mask);
15802 }
15803
15804 boolean_t
15805 vm_map_page_aligned(
15806         vm_map_offset_t offset,
15807         vm_map_offset_t mask)
15808 {
15809         return ((offset) & mask) == 0;
15810 }
15811
15812 int
15813 vm_map_page_shift(
15814         vm_map_t map)
15815 {
15816         return VM_MAP_PAGE_SHIFT(map);
15817 }
15818
15819 int
15820 vm_map_page_size(
15821         vm_map_t map)
15822 {
15823         return VM_MAP_PAGE_SIZE(map);
15824 }
15825
15826 vm_map_offset_t
15827 vm_map_page_mask(
15828         vm_map_t map)
15829 {
15830         return VM_MAP_PAGE_MASK(map);
15831 }
15832
15833 kern_return_t
15834 vm_map_set_page_shift(
15835         vm_map_t        map,
15836         int             pageshift)
15837 {
15838         if (map->hdr.nentries != 0) {
15839                 /* too late to change page size */
15840                 return KERN_FAILURE;
15841         }
15842
15843         map->hdr.page_shift = pageshift;
15844
15845         return KERN_SUCCESS;
15846 }
15847
15848 int
15849 vm_map_purge(
15850         vm_map_t        map)
15851 {
15852         int             num_object_purged;
15853         vm_map_entry_t  entry;
15854         vm_map_offset_t next_address;
15855         vm_object_t     object;
15856         int             state;
15857         kern_return_t   kr;
15858
15859         num_object_purged = 0;
15860
15861         vm_map_lock_read(map);
15862         entry = vm_map_first_entry(map);
15863         while (entry != vm_map_to_entry(map)) {
15864                 if (entry->is_sub_map) {
15865                         goto next;
15866                 }
15867                 if (! (entry->protection & VM_PROT_WRITE)) {
15868                         goto next;
15869                 }
15870                 object = VME_OBJECT(entry);
15871                 if (object == VM_OBJECT_NULL) {
15872                         goto next;
15873                 }
15874                 if (object->purgable != VM_PURGABLE_VOLATILE) {
15875                         goto next;
15876                 }
15877
15878                 vm_object_lock(object);
15879 #if 00
15880                 if (VME_OFFSET(entry) != 0 ||
15881                     (entry->vme_end - entry->vme_start) != object->vo_size) {
15882                         vm_object_unlock(object);
15883                         goto next;
15884                 }
15885 #endif
15886                 next_address = entry->vme_end;
15887                 vm_map_unlock_read(map);
15888                 state = VM_PURGABLE_EMPTY;
15889                 kr = vm_object_purgable_control(object,
15890                                                 VM_PURGABLE_SET_STATE,
15891                                                 &state);
15892                 if (kr == KERN_SUCCESS) {
15893                         num_object_purged++;
15894                 }
15895                 vm_object_unlock(object);
15896
15897                 vm_map_lock_read(map);
15898                 if (vm_map_lookup_entry(map, next_address, &entry)) {
15899                         continue;
15900                 }
15901         next:
15902                 entry = entry->vme_next;
15903         }
15904         vm_map_unlock_read(map);
15905
15906         return num_object_purged;
15907 }
15908
15909 kern_return_t
15910 vm_map_query_volatile(
15911         vm_map_t        map,
15912         mach_vm_size_t  *volatile_virtual_size_p,
15913         mach_vm_size_t  *volatile_resident_size_p,
15914         mach_vm_size_t  *volatile_compressed_size_p,
15915         mach_vm_size_t  *volatile_pmap_size_p,
15916         mach_vm_size_t  *volatile_compressed_pmap_size_p)
15917 {
15918         mach_vm_size_t  volatile_virtual_size;
15919         mach_vm_size_t  volatile_resident_count;
15920         mach_vm_size_t  volatile_compressed_count;
15921         mach_vm_size_t  volatile_pmap_count;
15922         mach_vm_size_t  volatile_compressed_pmap_count;
15923         mach_vm_size_t  resident_count;
15924         vm_map_entry_t  entry;
15925         vm_object_t     object;
15926
15927         /* map should be locked by caller */
15928
15929         volatile_virtual_size = 0;
15930         volatile_resident_count = 0;
15931         volatile_compressed_count = 0;
15932         volatile_pmap_count = 0;
15933         volatile_compressed_pmap_count = 0;
15934
15935         for (entry = vm_map_first_entry(map);
15936              entry != vm_map_to_entry(map);
15937              entry = entry->vme_next) {
15938                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
15939
15940                 if (entry->is_sub_map) {
15941                         continue;
15942                 }
15943                 if (! (entry->protection & VM_PROT_WRITE)) {
15944                         continue;
15945                 }
15946                 object = VME_OBJECT(entry);
15947                 if (object == VM_OBJECT_NULL) {
15948                         continue;
15949                 }
15950                 if (object->purgable != VM_PURGABLE_VOLATILE &&
15951                     object->purgable != VM_PURGABLE_EMPTY) {
15952                         continue;
15953                 }
15954                 if (VME_OFFSET(entry)) {
15955                         /*
15956                          * If the map entry has been split and the object now
15957                          * appears several times in the VM map, we don't want
15958                          * to count the object's resident_page_count more than
15959                          * once.  We count it only for the first one, starting
15960                          * at offset 0 and ignore the other VM map entries.
15961                          */
15962                         continue;
15963                 }
15964                 resident_count = object->resident_page_count;
15965                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
15966                         resident_count = 0;
15967                 } else {
15968                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
15969                 }
15970
15971                 volatile_virtual_size += entry->vme_end - entry->vme_start;
15972                 volatile_resident_count += resident_count;
15973                 if (object->pager) {
15974                         volatile_compressed_count +=
15975                                 vm_compressor_pager_get_count(object->pager);
15976                 }
15977                 pmap_compressed_bytes = 0;
15978                 pmap_resident_bytes =
15979                         pmap_query_resident(map->pmap,
15980                                             entry->vme_start,
15981                                             entry->vme_end,
15982                                             &pmap_compressed_bytes);
15983                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
15984                 volatile_compressed_pmap_count += (pmap_compressed_bytes
15985                                                    / PAGE_SIZE);
15986         }
15987
15988         /* map is still locked on return */
15989
15990         *volatile_virtual_size_p = volatile_virtual_size;
15991         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
15992         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
15993         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
15994         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
15995
15996         return KERN_SUCCESS;
15997 }
15998
15999 void
16000 vm_map_sizes(vm_map_t map,
16001                 vm_map_size_t * psize,
16002                 vm_map_size_t * pfree,
16003                 vm_map_size_t * plargest_free)
16004 {
16005     vm_map_entry_t  entry;
16006     vm_map_offset_t prev;
16007     vm_map_size_t   free, total_free, largest_free;
16008     boolean_t       end;
16009
16010     total_free = largest_free = 0;
16011
16012     vm_map_lock_read(map);
16013     if (psize) *psize = map->max_offset - map->min_offset;
16014
16015     prev = map->min_offset;
16016     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
16017     {
16018         end = (entry == vm_map_to_entry(map));
16019
16020         if (end) free = entry->vme_end   - prev;
16021         else     free = entry->vme_start - prev;
16022
16023         total_free += free;
16024         if (free > largest_free) largest_free = free;
16025
16026         if (end) break;
16027         prev = entry->vme_end;
16028     }
16029     vm_map_unlock_read(map);
16030     if (pfree)         *pfree = total_free;
16031     if (plargest_free) *plargest_free = largest_free;
16032 }
16033
16034 #if VM_SCAN_FOR_SHADOW_CHAIN
16035 int vm_map_shadow_max(vm_map_t map);
16036 int vm_map_shadow_max(
16037         vm_map_t map)
16038 {
16039         int             shadows, shadows_max;
16040         vm_map_entry_t  entry;
16041         vm_object_t     object, next_object;
16042
16043         if (map == NULL)
16044                 return 0;
16045
16046         shadows_max = 0;
16047
16048         vm_map_lock_read(map);
16049
16050         for (entry = vm_map_first_entry(map);
16051              entry != vm_map_to_entry(map);
16052              entry = entry->vme_next) {
16053                 if (entry->is_sub_map) {
16054                         continue;
16055                 }
16056                 object = VME_OBJECT(entry);
16057                 if (object == NULL) {
16058                         continue;
16059                 }
16060                 vm_object_lock_shared(object);
16061                 for (shadows = 0;
16062                      object->shadow != NULL;
16063                      shadows++, object = next_object) {
16064                         next_object = object->shadow;
16065                         vm_object_lock_shared(next_object);
16066                         vm_object_unlock(object);
16067                 }
16068                 vm_object_unlock(object);
16069                 if (shadows > shadows_max) {
16070                         shadows_max = shadows;
16071                 }
16072         }
16073
16074         vm_map_unlock_read(map);
16075
16076         return shadows_max;
16077 }
16078 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */