osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/counters.h>
  86 #include <kern/kalloc.h>
  87 #include <kern/zalloc.h>
  88
  89 #include <vm/cpm.h>
  90 #include <vm/vm_compressor_pager.h>
  91 #include <vm/vm_init.h>
  92 #include <vm/vm_fault.h>
  93 #include <vm/vm_map.h>
  94 #include <vm/vm_object.h>
  95 #include <vm/vm_page.h>
  96 #include <vm/vm_pageout.h>
  97 #include <vm/vm_kern.h>
  98 #include <ipc/ipc_port.h>
  99 #include <kern/sched_prim.h>
 100 #include <kern/misc_protos.h>
 101 #include <kern/xpr.h>
 102
 103 #include <mach/vm_map_server.h>
 104 #include <mach/mach_host_server.h>
 105 #include <vm/vm_protos.h>
 106 #include <vm/vm_purgeable_internal.h>
 107
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_shared_region.h>
 110 #include <vm/vm_map_store.h>
 111
 112 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 113 /* Internal prototypes
 114  */
 115
 116 static void vm_map_simplify_range(
 117         vm_map_t        map,
 118         vm_map_offset_t start,
 119         vm_map_offset_t end);   /* forward */
 120
 121 static boolean_t        vm_map_range_check(
 122         vm_map_t        map,
 123         vm_map_offset_t start,
 124         vm_map_offset_t end,
 125         vm_map_entry_t  *entry);
 126
 127 static vm_map_entry_t   _vm_map_entry_create(
 128         struct vm_map_header    *map_header, boolean_t map_locked);
 129
 130 static void             _vm_map_entry_dispose(
 131         struct vm_map_header    *map_header,
 132         vm_map_entry_t          entry);
 133
 134 static void             vm_map_pmap_enter(
 135         vm_map_t                map,
 136         vm_map_offset_t         addr,
 137         vm_map_offset_t         end_addr,
 138         vm_object_t             object,
 139         vm_object_offset_t      offset,
 140         vm_prot_t               protection);
 141
 142 static void             _vm_map_clip_end(
 143         struct vm_map_header    *map_header,
 144         vm_map_entry_t          entry,
 145         vm_map_offset_t         end);
 146
 147 static void             _vm_map_clip_start(
 148         struct vm_map_header    *map_header,
 149         vm_map_entry_t          entry,
 150         vm_map_offset_t         start);
 151
 152 static void             vm_map_entry_delete(
 153         vm_map_t        map,
 154         vm_map_entry_t  entry);
 155
 156 static kern_return_t    vm_map_delete(
 157         vm_map_t        map,
 158         vm_map_offset_t start,
 159         vm_map_offset_t end,
 160         int             flags,
 161         vm_map_t        zap_map);
 162
 163 static kern_return_t    vm_map_copy_overwrite_unaligned(
 164         vm_map_t        dst_map,
 165         vm_map_entry_t  entry,
 166         vm_map_copy_t   copy,
 167         vm_map_address_t start,
 168         boolean_t       discard_on_success);
 169
 170 static kern_return_t    vm_map_copy_overwrite_aligned(
 171         vm_map_t        dst_map,
 172         vm_map_entry_t  tmp_entry,
 173         vm_map_copy_t   copy,
 174         vm_map_offset_t start,
 175         pmap_t          pmap);
 176
 177 static kern_return_t    vm_map_copyin_kernel_buffer(
 178         vm_map_t        src_map,
 179         vm_map_address_t src_addr,
 180         vm_map_size_t   len,
 181         boolean_t       src_destroy,
 182         vm_map_copy_t   *copy_result);  /* OUT */
 183
 184 static kern_return_t    vm_map_copyout_kernel_buffer(
 185         vm_map_t        map,
 186         vm_map_address_t *addr, /* IN/OUT */
 187         vm_map_copy_t   copy,
 188         boolean_t       overwrite,
 189         boolean_t       consume_on_success);
 190
 191 static void             vm_map_fork_share(
 192         vm_map_t        old_map,
 193         vm_map_entry_t  old_entry,
 194         vm_map_t        new_map);
 195
 196 static boolean_t        vm_map_fork_copy(
 197         vm_map_t        old_map,
 198         vm_map_entry_t  *old_entry_p,
 199         vm_map_t        new_map);
 200
 201 void            vm_map_region_top_walk(
 202         vm_map_entry_t             entry,
 203         vm_region_top_info_t       top);
 204
 205 void            vm_map_region_walk(
 206         vm_map_t                   map,
 207         vm_map_offset_t            va,
 208         vm_map_entry_t             entry,
 209         vm_object_offset_t         offset,
 210         vm_object_size_t           range,
 211         vm_region_extended_info_t  extended,
 212         boolean_t                  look_for_pages,
 213         mach_msg_type_number_t count);
 214
 215 static kern_return_t    vm_map_wire_nested(
 216         vm_map_t                   map,
 217         vm_map_offset_t            start,
 218         vm_map_offset_t            end,
 219         vm_prot_t                  access_type,
 220         boolean_t                  user_wire,
 221         pmap_t                     map_pmap,
 222         vm_map_offset_t            pmap_addr,
 223         ppnum_t                    *physpage_p);
 224
 225 static kern_return_t    vm_map_unwire_nested(
 226         vm_map_t                   map,
 227         vm_map_offset_t            start,
 228         vm_map_offset_t            end,
 229         boolean_t                  user_wire,
 230         pmap_t                     map_pmap,
 231         vm_map_offset_t            pmap_addr);
 232
 233 static kern_return_t    vm_map_overwrite_submap_recurse(
 234         vm_map_t                   dst_map,
 235         vm_map_offset_t            dst_addr,
 236         vm_map_size_t              dst_size);
 237
 238 static kern_return_t    vm_map_copy_overwrite_nested(
 239         vm_map_t                   dst_map,
 240         vm_map_offset_t            dst_addr,
 241         vm_map_copy_t              copy,
 242         boolean_t                  interruptible,
 243         pmap_t                     pmap,
 244         boolean_t                  discard_on_success);
 245
 246 static kern_return_t    vm_map_remap_extract(
 247         vm_map_t                map,
 248         vm_map_offset_t         addr,
 249         vm_map_size_t           size,
 250         boolean_t               copy,
 251         struct vm_map_header    *map_header,
 252         vm_prot_t               *cur_protection,
 253         vm_prot_t               *max_protection,
 254         vm_inherit_t            inheritance,
 255         boolean_t               pageable);
 256
 257 static kern_return_t    vm_map_remap_range_allocate(
 258         vm_map_t                map,
 259         vm_map_address_t        *address,
 260         vm_map_size_t           size,
 261         vm_map_offset_t         mask,
 262         int                     flags,
 263         vm_map_entry_t          *map_entry);
 264
 265 static void             vm_map_region_look_for_page(
 266         vm_map_t                   map,
 267         vm_map_offset_t            va,
 268         vm_object_t                object,
 269         vm_object_offset_t         offset,
 270         int                        max_refcnt,
 271         int                        depth,
 272         vm_region_extended_info_t  extended,
 273         mach_msg_type_number_t count);
 274
 275 static int              vm_map_region_count_obj_refs(
 276         vm_map_entry_t             entry,
 277         vm_object_t                object);
 278
 279
 280 static kern_return_t    vm_map_willneed(
 281         vm_map_t        map,
 282         vm_map_offset_t start,
 283         vm_map_offset_t end);
 284
 285 static kern_return_t    vm_map_reuse_pages(
 286         vm_map_t        map,
 287         vm_map_offset_t start,
 288         vm_map_offset_t end);
 289
 290 static kern_return_t    vm_map_reusable_pages(
 291         vm_map_t        map,
 292         vm_map_offset_t start,
 293         vm_map_offset_t end);
 294
 295 static kern_return_t    vm_map_can_reuse(
 296         vm_map_t        map,
 297         vm_map_offset_t start,
 298         vm_map_offset_t end);
 299
 300
 301 /*
 302  * Macros to copy a vm_map_entry. We must be careful to correctly
 303  * manage the wired page count. vm_map_entry_copy() creates a new
 304  * map entry to the same memory - the wired count in the new entry
 305  * must be set to zero. vm_map_entry_copy_full() creates a new
 306  * entry that is identical to the old entry.  This preserves the
 307  * wire count; it's used for map splitting and zone changing in
 308  * vm_map_copyout.
 309  */
 310
 311 #define vm_map_entry_copy(NEW,OLD)      \
 312 MACRO_BEGIN                             \
 313 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 314         *(NEW) = *(OLD);                \
 315         (NEW)->is_shared = FALSE;       \
 316         (NEW)->needs_wakeup = FALSE;    \
 317         (NEW)->in_transition = FALSE;   \
 318         (NEW)->wired_count = 0;         \
 319         (NEW)->user_wired_count = 0;    \
 320         (NEW)->permanent = FALSE;       \
 321         (NEW)->used_for_jit = FALSE;    \
 322         (NEW)->from_reserved_zone = _vmec_reserved;     \
 323         (NEW)->iokit_acct = FALSE;      \
 324 MACRO_END
 325
 326 #define vm_map_entry_copy_full(NEW,OLD)                 \
 327 MACRO_BEGIN                                             \
 328 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 329 (*(NEW) = *(OLD));                                      \
 330 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 331 MACRO_END
 332
 333 /*
 334  *      Decide if we want to allow processes to execute from their data or stack areas.
 335  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 336  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 337  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 338  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 339  *      specific pmap files since the default behavior varies according to architecture.  The
 340  *      main reason it varies is because of the need to provide binary compatibility with old
 341  *      applications that were written before these restrictions came into being.  In the old
 342  *      days, an app could execute anything it could read, but this has slowly been tightened
 343  *      up over time.  The default behavior is:
 344  *
 345  *      32-bit PPC apps         may execute from both stack and data areas
 346  *      32-bit Intel apps       may exeucte from data areas but not stack
 347  *      64-bit PPC/Intel apps   may not execute from either data or stack
 348  *
 349  *      An application on any architecture may override these defaults by explicitly
 350  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 351  *      system call.  This code here just determines what happens when an app tries to
 352  *      execute from a page that lacks execute permission.
 353  *
 354  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 355  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 356  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 357  *      execution from data areas for a particular binary even if the arch normally permits it. As
 358  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 359  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 360  *      are not all NX-safe.
 361  */
 362
 363 extern int allow_data_exec, allow_stack_exec;
 364
 365 int
 366 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 367 {
 368         int current_abi;
 369
 370         /*
 371          * Determine if the app is running in 32 or 64 bit mode.
 372          */
 373
 374         if (vm_map_is_64bit(map))
 375                 current_abi = VM_ABI_64;
 376         else
 377                 current_abi = VM_ABI_32;
 378
 379         /*
 380          * Determine if we should allow the execution based on whether it's a
 381          * stack or data area and the current architecture.
 382          */
 383
 384         if (user_tag == VM_MEMORY_STACK)
 385                 return allow_stack_exec & current_abi;
 386
 387         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 388 }
 389
 390
 391 /*
 392  *      Virtual memory maps provide for the mapping, protection,
 393  *      and sharing of virtual memory objects.  In addition,
 394  *      this module provides for an efficient virtual copy of
 395  *      memory from one map to another.
 396  *
 397  *      Synchronization is required prior to most operations.
 398  *
 399  *      Maps consist of an ordered doubly-linked list of simple
 400  *      entries; a single hint is used to speed up lookups.
 401  *
 402  *      Sharing maps have been deleted from this version of Mach.
 403  *      All shared objects are now mapped directly into the respective
 404  *      maps.  This requires a change in the copy on write strategy;
 405  *      the asymmetric (delayed) strategy is used for shared temporary
 406  *      objects instead of the symmetric (shadow) strategy.  All maps
 407  *      are now "top level" maps (either task map, kernel map or submap
 408  *      of the kernel map).
 409  *
 410  *      Since portions of maps are specified by start/end addreses,
 411  *      which may not align with existing map entries, all
 412  *      routines merely "clip" entries to these start/end values.
 413  *      [That is, an entry is split into two, bordering at a
 414  *      start or end value.]  Note that these clippings may not
 415  *      always be necessary (as the two resulting entries are then
 416  *      not changed); however, the clipping is done for convenience.
 417  *      No attempt is currently made to "glue back together" two
 418  *      abutting entries.
 419  *
 420  *      The symmetric (shadow) copy strategy implements virtual copy
 421  *      by copying VM object references from one map to
 422  *      another, and then marking both regions as copy-on-write.
 423  *      It is important to note that only one writeable reference
 424  *      to a VM object region exists in any map when this strategy
 425  *      is used -- this means that shadow object creation can be
 426  *      delayed until a write operation occurs.  The symmetric (delayed)
 427  *      strategy allows multiple maps to have writeable references to
 428  *      the same region of a vm object, and hence cannot delay creating
 429  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 430  *      Copying of permanent objects is completely different; see
 431  *      vm_object_copy_strategically() in vm_object.c.
 432  */
 433
 434 static zone_t   vm_map_zone;            /* zone for vm_map structures */
 435 static zone_t   vm_map_entry_zone;      /* zone for vm_map_entry structures */
 436 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking
 437                                          * allocations */
 438 static zone_t   vm_map_copy_zone;       /* zone for vm_map_copy structures */
 439
 440
 441 /*
 442  *      Placeholder object for submap operations.  This object is dropped
 443  *      into the range by a call to vm_map_find, and removed when
 444  *      vm_map_submap creates the submap.
 445  */
 446
 447 vm_object_t     vm_submap_object;
 448
 449 static void             *map_data;
 450 static vm_size_t        map_data_size;
 451 static void             *kentry_data;
 452 static vm_size_t        kentry_data_size;
 453
 454 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 455
 456 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 457 unsigned int not_in_kdp = 1;
 458
 459 unsigned int vm_map_set_cache_attr_count = 0;
 460
 461 kern_return_t
 462 vm_map_set_cache_attr(
 463         vm_map_t        map,
 464         vm_map_offset_t va)
 465 {
 466         vm_map_entry_t  map_entry;
 467         vm_object_t     object;
 468         kern_return_t   kr = KERN_SUCCESS;
 469
 470         vm_map_lock_read(map);
 471
 472         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 473             map_entry->is_sub_map) {
 474                 /*
 475                  * that memory is not properly mapped
 476                  */
 477                 kr = KERN_INVALID_ARGUMENT;
 478                 goto done;
 479         }
 480         object = map_entry->object.vm_object;
 481
 482         if (object == VM_OBJECT_NULL) {
 483                 /*
 484                  * there should be a VM object here at this point
 485                  */
 486                 kr = KERN_INVALID_ARGUMENT;
 487                 goto done;
 488         }
 489         vm_object_lock(object);
 490         object->set_cache_attr = TRUE;
 491         vm_object_unlock(object);
 492
 493         vm_map_set_cache_attr_count++;
 494 done:
 495         vm_map_unlock_read(map);
 496
 497         return kr;
 498 }
 499
 500
 501 #if CONFIG_CODE_DECRYPTION
 502 /*
 503  * vm_map_apple_protected:
 504  * This remaps the requested part of the object with an object backed by
 505  * the decrypting pager.
 506  * crypt_info contains entry points and session data for the crypt module.
 507  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 508  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 509  */
 510 kern_return_t
 511 vm_map_apple_protected(
 512         vm_map_t        map,
 513         vm_map_offset_t start,
 514         vm_map_offset_t end,
 515         struct pager_crypt_info *crypt_info)
 516 {
 517         boolean_t       map_locked;
 518         kern_return_t   kr;
 519         vm_map_entry_t  map_entry;
 520         memory_object_t protected_mem_obj;
 521         vm_object_t     protected_object;
 522         vm_map_offset_t map_addr;
 523
 524         vm_map_lock_read(map);
 525         map_locked = TRUE;
 526
 527         /* lookup the protected VM object */
 528         if (!vm_map_lookup_entry(map,
 529                                  start,
 530                                  &map_entry) ||
 531             map_entry->vme_end < end ||
 532             map_entry->is_sub_map ||
 533             !(map_entry->protection & VM_PROT_EXECUTE)) {
 534                 /* that memory is not properly mapped */
 535                 kr = KERN_INVALID_ARGUMENT;
 536                 goto done;
 537         }
 538         protected_object = map_entry->object.vm_object;
 539         if (protected_object == VM_OBJECT_NULL) {
 540                 /* there should be a VM object here at this point */
 541                 kr = KERN_INVALID_ARGUMENT;
 542                 goto done;
 543         }
 544
 545         /* make sure protected object stays alive while map is unlocked */
 546         vm_object_reference(protected_object);
 547
 548         vm_map_unlock_read(map);
 549         map_locked = FALSE;
 550
 551         /*
 552          * Lookup (and create if necessary) the protected memory object
 553          * matching that VM object.
 554          * If successful, this also grabs a reference on the memory object,
 555          * to guarantee that it doesn't go away before we get a chance to map
 556          * it.
 557          */
 558         protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
 559
 560         /* release extra ref on protected object */
 561         vm_object_deallocate(protected_object);
 562
 563         if (protected_mem_obj == NULL) {
 564                 kr = KERN_FAILURE;
 565                 goto done;
 566         }
 567
 568         /* map this memory object in place of the current one */
 569         map_addr = start;
 570         kr = vm_map_enter_mem_object(map,
 571                                      &map_addr,
 572                                      end - start,
 573                                      (mach_vm_offset_t) 0,
 574                                      VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
 575                                      (ipc_port_t) protected_mem_obj,
 576                                      (map_entry->offset +
 577                                       (start - map_entry->vme_start)),
 578                                      TRUE,
 579                                      map_entry->protection,
 580                                      map_entry->max_protection,
 581                                      map_entry->inheritance);
 582         assert(map_addr == start);
 583         /*
 584          * Release the reference obtained by apple_protect_pager_setup().
 585          * The mapping (if it succeeded) is now holding a reference on the
 586          * memory object.
 587          */
 588         memory_object_deallocate(protected_mem_obj);
 589
 590 done:
 591         if (map_locked) {
 592                 vm_map_unlock_read(map);
 593         }
 594         return kr;
 595 }
 596 #endif  /* CONFIG_CODE_DECRYPTION */
 597
 598
 599 lck_grp_t               vm_map_lck_grp;
 600 lck_grp_attr_t  vm_map_lck_grp_attr;
 601 lck_attr_t              vm_map_lck_attr;
 602 lck_attr_t              vm_map_lck_rw_attr;
 603
 604
 605 /*
 606  *      vm_map_init:
 607  *
 608  *      Initialize the vm_map module.  Must be called before
 609  *      any other vm_map routines.
 610  *
 611  *      Map and entry structures are allocated from zones -- we must
 612  *      initialize those zones.
 613  *
 614  *      There are three zones of interest:
 615  *
 616  *      vm_map_zone:            used to allocate maps.
 617  *      vm_map_entry_zone:      used to allocate map entries.
 618  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 619  *
 620  *      The kernel allocates map entries from a special zone that is initially
 621  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 622  *      the kernel to allocate more memory to a entry zone when it became
 623  *      empty since the very act of allocating memory implies the creation
 624  *      of a new entry.
 625  */
 626 void
 627 vm_map_init(
 628         void)
 629 {
 630         vm_size_t entry_zone_alloc_size;
 631         const char *mez_name = "VM map entries";
 632
 633         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 634                             PAGE_SIZE, "maps");
 635         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 636 #if     defined(__LP64__)
 637         entry_zone_alloc_size = PAGE_SIZE * 5;
 638 #else
 639         entry_zone_alloc_size = PAGE_SIZE * 6;
 640 #endif
 641         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 642                                   1024*1024, entry_zone_alloc_size,
 643                                   mez_name);
 644         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 645         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 646         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 647
 648         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 649                                    kentry_data_size * 64, kentry_data_size,
 650                                    "Reserved VM map entries");
 651         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 652
 653         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 654                                  16*1024, PAGE_SIZE, "VM map copies");
 655         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 656
 657         /*
 658          *      Cram the map and kentry zones with initial data.
 659          *      Set reserved_zone non-collectible to aid zone_gc().
 660          */
 661         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 662
 663         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 664         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 665         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 666         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 667         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 668         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 669         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 670
 671         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 672         zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size);
 673
 674         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 675         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 676         lck_attr_setdefault(&vm_map_lck_attr);
 677
 678         lck_attr_setdefault(&vm_map_lck_rw_attr);
 679         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 680
 681 #if CONFIG_FREEZE
 682         default_freezer_init();
 683 #endif /* CONFIG_FREEZE */
 684 }
 685
 686 void
 687 vm_map_steal_memory(
 688         void)
 689 {
 690         uint32_t kentry_initial_pages;
 691
 692         map_data_size = round_page(10 * sizeof(struct _vm_map));
 693         map_data = pmap_steal_memory(map_data_size);
 694
 695         /*
 696          * kentry_initial_pages corresponds to the number of kernel map entries
 697          * required during bootstrap until the asynchronous replenishment
 698          * scheme is activated and/or entries are available from the general
 699          * map entry pool.
 700          */
 701 #if     defined(__LP64__)
 702         kentry_initial_pages = 10;
 703 #else
 704         kentry_initial_pages = 6;
 705 #endif
 706
 707 #if CONFIG_GZALLOC
 708         /* If using the guard allocator, reserve more memory for the kernel
 709          * reserved map entry pool.
 710         */
 711         if (gzalloc_enabled())
 712                 kentry_initial_pages *= 1024;
 713 #endif
 714
 715         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 716         kentry_data = pmap_steal_memory(kentry_data_size);
 717 }
 718
 719 void vm_kernel_reserved_entry_init(void) {
 720         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 721 }
 722
 723 /*
 724  *      vm_map_create:
 725  *
 726  *      Creates and returns a new empty VM map with
 727  *      the given physical map structure, and having
 728  *      the given lower and upper address bounds.
 729  */
 730 vm_map_t
 731 vm_map_create(
 732         pmap_t                  pmap,
 733         vm_map_offset_t min,
 734         vm_map_offset_t max,
 735         boolean_t               pageable)
 736 {
 737         static int              color_seed = 0;
 738         register vm_map_t       result;
 739
 740         result = (vm_map_t) zalloc(vm_map_zone);
 741         if (result == VM_MAP_NULL)
 742                 panic("vm_map_create");
 743
 744         vm_map_first_entry(result) = vm_map_to_entry(result);
 745         vm_map_last_entry(result)  = vm_map_to_entry(result);
 746         result->hdr.nentries = 0;
 747         result->hdr.entries_pageable = pageable;
 748
 749         vm_map_store_init( &(result->hdr) );
 750
 751         result->hdr.page_shift = PAGE_SHIFT;
 752
 753         result->size = 0;
 754         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
 755         result->user_wire_size  = 0;
 756         result->ref_count = 1;
 757 #if     TASK_SWAPPER
 758         result->res_count = 1;
 759         result->sw_state = MAP_SW_IN;
 760 #endif  /* TASK_SWAPPER */
 761         result->pmap = pmap;
 762         result->min_offset = min;
 763         result->max_offset = max;
 764         result->wiring_required = FALSE;
 765         result->no_zero_fill = FALSE;
 766         result->mapped_in_other_pmaps = FALSE;
 767         result->wait_for_space = FALSE;
 768         result->switch_protect = FALSE;
 769         result->disable_vmentry_reuse = FALSE;
 770         result->map_disallow_data_exec = FALSE;
 771         result->highest_entry_end = 0;
 772         result->first_free = vm_map_to_entry(result);
 773         result->hint = vm_map_to_entry(result);
 774         result->color_rr = (color_seed++) & vm_color_mask;
 775         result->jit_entry_exists = FALSE;
 776 #if CONFIG_FREEZE
 777         result->default_freezer_handle = NULL;
 778 #endif
 779         vm_map_lock_init(result);
 780         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
 781
 782         return(result);
 783 }
 784
 785 /*
 786  *      vm_map_entry_create:    [ internal use only ]
 787  *
 788  *      Allocates a VM map entry for insertion in the
 789  *      given map (or map copy).  No fields are filled.
 790  */
 791 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
 792
 793 #define vm_map_copy_entry_create(copy, map_locked)                                      \
 794         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
 795 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
 796
 797 static vm_map_entry_t
 798 _vm_map_entry_create(
 799         struct vm_map_header    *map_header, boolean_t __unused map_locked)
 800 {
 801         zone_t  zone;
 802         vm_map_entry_t  entry;
 803
 804         zone = vm_map_entry_zone;
 805
 806         assert(map_header->entries_pageable ? !map_locked : TRUE);
 807
 808         if (map_header->entries_pageable) {
 809                 entry = (vm_map_entry_t) zalloc(zone);
 810         }
 811         else {
 812                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
 813
 814                 if (entry == VM_MAP_ENTRY_NULL) {
 815                         zone = vm_map_entry_reserved_zone;
 816                         entry = (vm_map_entry_t) zalloc(zone);
 817                         OSAddAtomic(1, &reserved_zalloc_count);
 818                 } else
 819                         OSAddAtomic(1, &nonreserved_zalloc_count);
 820         }
 821
 822         if (entry == VM_MAP_ENTRY_NULL)
 823                 panic("vm_map_entry_create");
 824         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
 825
 826         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
 827 #if     MAP_ENTRY_CREATION_DEBUG
 828         entry->vme_creation_maphdr = map_header;
 829         fastbacktrace(&entry->vme_creation_bt[0],
 830                       (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
 831 #endif
 832         return(entry);
 833 }
 834
 835 /*
 836  *      vm_map_entry_dispose:   [ internal use only ]
 837  *
 838  *      Inverse of vm_map_entry_create.
 839  *
 840  *      write map lock held so no need to
 841  *      do anything special to insure correctness
 842  *      of the stores
 843  */
 844 #define vm_map_entry_dispose(map, entry)                        \
 845         _vm_map_entry_dispose(&(map)->hdr, (entry))
 846
 847 #define vm_map_copy_entry_dispose(map, entry) \
 848         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
 849
 850 static void
 851 _vm_map_entry_dispose(
 852         register struct vm_map_header   *map_header,
 853         register vm_map_entry_t         entry)
 854 {
 855         register zone_t         zone;
 856
 857         if (map_header->entries_pageable || !(entry->from_reserved_zone))
 858                 zone = vm_map_entry_zone;
 859         else
 860                 zone = vm_map_entry_reserved_zone;
 861
 862         if (!map_header->entries_pageable) {
 863                 if (zone == vm_map_entry_zone)
 864                         OSAddAtomic(-1, &nonreserved_zalloc_count);
 865                 else
 866                         OSAddAtomic(-1, &reserved_zalloc_count);
 867         }
 868
 869         zfree(zone, entry);
 870 }
 871
 872 #if MACH_ASSERT
 873 static boolean_t first_free_check = FALSE;
 874 boolean_t
 875 first_free_is_valid(
 876         vm_map_t        map)
 877 {
 878         if (!first_free_check)
 879                 return TRUE;
 880
 881         return( first_free_is_valid_store( map ));
 882 }
 883 #endif /* MACH_ASSERT */
 884
 885
 886 #define vm_map_copy_entry_link(copy, after_where, entry)                \
 887         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
 888
 889 #define vm_map_copy_entry_unlink(copy, entry)                           \
 890         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
 891
 892 #if     MACH_ASSERT && TASK_SWAPPER
 893 /*
 894  *      vm_map_res_reference:
 895  *
 896  *      Adds another valid residence count to the given map.
 897  *
 898  *      Map is locked so this function can be called from
 899  *      vm_map_swapin.
 900  *
 901  */
 902 void vm_map_res_reference(register vm_map_t map)
 903 {
 904         /* assert map is locked */
 905         assert(map->res_count >= 0);
 906         assert(map->ref_count >= map->res_count);
 907         if (map->res_count == 0) {
 908                 lck_mtx_unlock(&map->s_lock);
 909                 vm_map_lock(map);
 910                 vm_map_swapin(map);
 911                 lck_mtx_lock(&map->s_lock);
 912                 ++map->res_count;
 913                 vm_map_unlock(map);
 914         } else
 915                 ++map->res_count;
 916 }
 917
 918 /*
 919  *      vm_map_reference_swap:
 920  *
 921  *      Adds valid reference and residence counts to the given map.
 922  *
 923  *      The map may not be in memory (i.e. zero residence count).
 924  *
 925  */
 926 void vm_map_reference_swap(register vm_map_t map)
 927 {
 928         assert(map != VM_MAP_NULL);
 929         lck_mtx_lock(&map->s_lock);
 930         assert(map->res_count >= 0);
 931         assert(map->ref_count >= map->res_count);
 932         map->ref_count++;
 933         vm_map_res_reference(map);
 934         lck_mtx_unlock(&map->s_lock);
 935 }
 936
 937 /*
 938  *      vm_map_res_deallocate:
 939  *
 940  *      Decrement residence count on a map; possibly causing swapout.
 941  *
 942  *      The map must be in memory (i.e. non-zero residence count).
 943  *
 944  *      The map is locked, so this function is callable from vm_map_deallocate.
 945  *
 946  */
 947 void vm_map_res_deallocate(register vm_map_t map)
 948 {
 949         assert(map->res_count > 0);
 950         if (--map->res_count == 0) {
 951                 lck_mtx_unlock(&map->s_lock);
 952                 vm_map_lock(map);
 953                 vm_map_swapout(map);
 954                 vm_map_unlock(map);
 955                 lck_mtx_lock(&map->s_lock);
 956         }
 957         assert(map->ref_count >= map->res_count);
 958 }
 959 #endif  /* MACH_ASSERT && TASK_SWAPPER */
 960
 961 /*
 962  *      vm_map_destroy:
 963  *
 964  *      Actually destroy a map.
 965  */
 966 void
 967 vm_map_destroy(
 968         vm_map_t        map,
 969         int             flags)
 970 {
 971         vm_map_lock(map);
 972
 973         /* clean up regular map entries */
 974         (void) vm_map_delete(map, map->min_offset, map->max_offset,
 975                              flags, VM_MAP_NULL);
 976         /* clean up leftover special mappings (commpage, etc...) */
 977         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
 978                              flags, VM_MAP_NULL);
 979
 980 #if CONFIG_FREEZE
 981         if (map->default_freezer_handle) {
 982                 default_freezer_handle_deallocate(map->default_freezer_handle);
 983                 map->default_freezer_handle = NULL;
 984         }
 985 #endif
 986         vm_map_unlock(map);
 987
 988         assert(map->hdr.nentries == 0);
 989
 990         if(map->pmap)
 991                 pmap_destroy(map->pmap);
 992
 993         zfree(vm_map_zone, map);
 994 }
 995
 996 #if     TASK_SWAPPER
 997 /*
 998  * vm_map_swapin/vm_map_swapout
 999  *
1000  * Swap a map in and out, either referencing or releasing its resources.
1001  * These functions are internal use only; however, they must be exported
1002  * because they may be called from macros, which are exported.
1003  *
1004  * In the case of swapout, there could be races on the residence count,
1005  * so if the residence count is up, we return, assuming that a
1006  * vm_map_deallocate() call in the near future will bring us back.
1007  *
1008  * Locking:
1009  *      -- We use the map write lock for synchronization among races.
1010  *      -- The map write lock, and not the simple s_lock, protects the
1011  *         swap state of the map.
1012  *      -- If a map entry is a share map, then we hold both locks, in
1013  *         hierarchical order.
1014  *
1015  * Synchronization Notes:
1016  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1017  *      will block on the map lock and proceed when swapout is through.
1018  *      2) A vm_map_reference() call at this time is illegal, and will
1019  *      cause a panic.  vm_map_reference() is only allowed on resident
1020  *      maps, since it refuses to block.
1021  *      3) A vm_map_swapin() call during a swapin will block, and
1022  *      proceeed when the first swapin is done, turning into a nop.
1023  *      This is the reason the res_count is not incremented until
1024  *      after the swapin is complete.
1025  *      4) There is a timing hole after the checks of the res_count, before
1026  *      the map lock is taken, during which a swapin may get the lock
1027  *      before a swapout about to happen.  If this happens, the swapin
1028  *      will detect the state and increment the reference count, causing
1029  *      the swapout to be a nop, thereby delaying it until a later
1030  *      vm_map_deallocate.  If the swapout gets the lock first, then
1031  *      the swapin will simply block until the swapout is done, and
1032  *      then proceed.
1033  *
1034  * Because vm_map_swapin() is potentially an expensive operation, it
1035  * should be used with caution.
1036  *
1037  * Invariants:
1038  *      1) A map with a residence count of zero is either swapped, or
1039  *         being swapped.
1040  *      2) A map with a non-zero residence count is either resident,
1041  *         or being swapped in.
1042  */
1043
1044 int vm_map_swap_enable = 1;
1045
1046 void vm_map_swapin (vm_map_t map)
1047 {
1048         register vm_map_entry_t entry;
1049
1050         if (!vm_map_swap_enable)        /* debug */
1051                 return;
1052
1053         /*
1054          * Map is locked
1055          * First deal with various races.
1056          */
1057         if (map->sw_state == MAP_SW_IN)
1058                 /*
1059                  * we raced with swapout and won.  Returning will incr.
1060                  * the res_count, turning the swapout into a nop.
1061                  */
1062                 return;
1063
1064         /*
1065          * The residence count must be zero.  If we raced with another
1066          * swapin, the state would have been IN; if we raced with a
1067          * swapout (after another competing swapin), we must have lost
1068          * the race to get here (see above comment), in which case
1069          * res_count is still 0.
1070          */
1071         assert(map->res_count == 0);
1072
1073         /*
1074          * There are no intermediate states of a map going out or
1075          * coming in, since the map is locked during the transition.
1076          */
1077         assert(map->sw_state == MAP_SW_OUT);
1078
1079         /*
1080          * We now operate upon each map entry.  If the entry is a sub-
1081          * or share-map, we call vm_map_res_reference upon it.
1082          * If the entry is an object, we call vm_object_res_reference
1083          * (this may iterate through the shadow chain).
1084          * Note that we hold the map locked the entire time,
1085          * even if we get back here via a recursive call in
1086          * vm_map_res_reference.
1087          */
1088         entry = vm_map_first_entry(map);
1089
1090         while (entry != vm_map_to_entry(map)) {
1091                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1092                         if (entry->is_sub_map) {
1093                                 vm_map_t lmap = entry->object.sub_map;
1094                                 lck_mtx_lock(&lmap->s_lock);
1095                                 vm_map_res_reference(lmap);
1096                                 lck_mtx_unlock(&lmap->s_lock);
1097                         } else {
1098                                 vm_object_t object = entry->object.vm_object;
1099                                 vm_object_lock(object);
1100                                 /*
1101                                  * This call may iterate through the
1102                                  * shadow chain.
1103                                  */
1104                                 vm_object_res_reference(object);
1105                                 vm_object_unlock(object);
1106                         }
1107                 }
1108                 entry = entry->vme_next;
1109         }
1110         assert(map->sw_state == MAP_SW_OUT);
1111         map->sw_state = MAP_SW_IN;
1112 }
1113
1114 void vm_map_swapout(vm_map_t map)
1115 {
1116         register vm_map_entry_t entry;
1117
1118         /*
1119          * Map is locked
1120          * First deal with various races.
1121          * If we raced with a swapin and lost, the residence count
1122          * will have been incremented to 1, and we simply return.
1123          */
1124         lck_mtx_lock(&map->s_lock);
1125         if (map->res_count != 0) {
1126                 lck_mtx_unlock(&map->s_lock);
1127                 return;
1128         }
1129         lck_mtx_unlock(&map->s_lock);
1130
1131         /*
1132          * There are no intermediate states of a map going out or
1133          * coming in, since the map is locked during the transition.
1134          */
1135         assert(map->sw_state == MAP_SW_IN);
1136
1137         if (!vm_map_swap_enable)
1138                 return;
1139
1140         /*
1141          * We now operate upon each map entry.  If the entry is a sub-
1142          * or share-map, we call vm_map_res_deallocate upon it.
1143          * If the entry is an object, we call vm_object_res_deallocate
1144          * (this may iterate through the shadow chain).
1145          * Note that we hold the map locked the entire time,
1146          * even if we get back here via a recursive call in
1147          * vm_map_res_deallocate.
1148          */
1149         entry = vm_map_first_entry(map);
1150
1151         while (entry != vm_map_to_entry(map)) {
1152                 if (entry->object.vm_object != VM_OBJECT_NULL) {
1153                         if (entry->is_sub_map) {
1154                                 vm_map_t lmap = entry->object.sub_map;
1155                                 lck_mtx_lock(&lmap->s_lock);
1156                                 vm_map_res_deallocate(lmap);
1157                                 lck_mtx_unlock(&lmap->s_lock);
1158                         } else {
1159                                 vm_object_t object = entry->object.vm_object;
1160                                 vm_object_lock(object);
1161                                 /*
1162                                  * This call may take a long time,
1163                                  * since it could actively push
1164                                  * out pages (if we implement it
1165                                  * that way).
1166                                  */
1167                                 vm_object_res_deallocate(object);
1168                                 vm_object_unlock(object);
1169                         }
1170                 }
1171                 entry = entry->vme_next;
1172         }
1173         assert(map->sw_state == MAP_SW_IN);
1174         map->sw_state = MAP_SW_OUT;
1175 }
1176
1177 #endif  /* TASK_SWAPPER */
1178
1179 /*
1180  *      vm_map_lookup_entry:    [ internal use only ]
1181  *
1182  *      Calls into the vm map store layer to find the map
1183  *      entry containing (or immediately preceding) the
1184  *      specified address in the given map; the entry is returned
1185  *      in the "entry" parameter.  The boolean
1186  *      result indicates whether the address is
1187  *      actually contained in the map.
1188  */
1189 boolean_t
1190 vm_map_lookup_entry(
1191         register vm_map_t               map,
1192         register vm_map_offset_t        address,
1193         vm_map_entry_t          *entry)         /* OUT */
1194 {
1195         return ( vm_map_store_lookup_entry( map, address, entry ));
1196 }
1197
1198 /*
1199  *      Routine:        vm_map_find_space
1200  *      Purpose:
1201  *              Allocate a range in the specified virtual address map,
1202  *              returning the entry allocated for that range.
1203  *              Used by kmem_alloc, etc.
1204  *
1205  *              The map must be NOT be locked. It will be returned locked
1206  *              on KERN_SUCCESS, unlocked on failure.
1207  *
1208  *              If an entry is allocated, the object/offset fields
1209  *              are initialized to zero.
1210  */
1211 kern_return_t
1212 vm_map_find_space(
1213         register vm_map_t       map,
1214         vm_map_offset_t         *address,       /* OUT */
1215         vm_map_size_t           size,
1216         vm_map_offset_t         mask,
1217         int                     flags,
1218         vm_map_entry_t          *o_entry)       /* OUT */
1219 {
1220         register vm_map_entry_t entry, new_entry;
1221         register vm_map_offset_t        start;
1222         register vm_map_offset_t        end;
1223
1224         if (size == 0) {
1225                 *address = 0;
1226                 return KERN_INVALID_ARGUMENT;
1227         }
1228
1229         if (flags & VM_FLAGS_GUARD_AFTER) {
1230                 /* account for the back guard page in the size */
1231                 size += VM_MAP_PAGE_SIZE(map);
1232         }
1233
1234         new_entry = vm_map_entry_create(map, FALSE);
1235
1236         /*
1237          *      Look for the first possible address; if there's already
1238          *      something at this address, we have to start after it.
1239          */
1240
1241         vm_map_lock(map);
1242
1243         if( map->disable_vmentry_reuse == TRUE) {
1244                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1245         } else {
1246                 assert(first_free_is_valid(map));
1247                 if ((entry = map->first_free) == vm_map_to_entry(map))
1248                         start = map->min_offset;
1249                 else
1250                         start = entry->vme_end;
1251         }
1252
1253         /*
1254          *      In any case, the "entry" always precedes
1255          *      the proposed new region throughout the loop:
1256          */
1257
1258         while (TRUE) {
1259                 register vm_map_entry_t next;
1260
1261                 /*
1262                  *      Find the end of the proposed new region.
1263                  *      Be sure we didn't go beyond the end, or
1264                  *      wrap around the address.
1265                  */
1266
1267                 if (flags & VM_FLAGS_GUARD_BEFORE) {
1268                         /* reserve space for the front guard page */
1269                         start += VM_MAP_PAGE_SIZE(map);
1270                 }
1271                 end = ((start + mask) & ~mask);
1272
1273                 if (end < start) {
1274                         vm_map_entry_dispose(map, new_entry);
1275                         vm_map_unlock(map);
1276                         return(KERN_NO_SPACE);
1277                 }
1278                 start = end;
1279                 end += size;
1280
1281                 if ((end > map->max_offset) || (end < start)) {
1282                         vm_map_entry_dispose(map, new_entry);
1283                         vm_map_unlock(map);
1284                         return(KERN_NO_SPACE);
1285                 }
1286
1287                 /*
1288                  *      If there are no more entries, we must win.
1289                  */
1290
1291                 next = entry->vme_next;
1292                 if (next == vm_map_to_entry(map))
1293                         break;
1294
1295                 /*
1296                  *      If there is another entry, it must be
1297                  *      after the end of the potential new region.
1298                  */
1299
1300                 if (next->vme_start >= end)
1301                         break;
1302
1303                 /*
1304                  *      Didn't fit -- move to the next entry.
1305                  */
1306
1307                 entry = next;
1308                 start = entry->vme_end;
1309         }
1310
1311         /*
1312          *      At this point,
1313          *              "start" and "end" should define the endpoints of the
1314          *                      available new range, and
1315          *              "entry" should refer to the region before the new
1316          *                      range, and
1317          *
1318          *              the map should be locked.
1319          */
1320
1321         if (flags & VM_FLAGS_GUARD_BEFORE) {
1322                 /* go back for the front guard page */
1323                 start -= VM_MAP_PAGE_SIZE(map);
1324         }
1325         *address = start;
1326
1327         assert(start < end);
1328         new_entry->vme_start = start;
1329         new_entry->vme_end = end;
1330         assert(page_aligned(new_entry->vme_start));
1331         assert(page_aligned(new_entry->vme_end));
1332         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1333                                    VM_MAP_PAGE_MASK(map)));
1334         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1335                                    VM_MAP_PAGE_MASK(map)));
1336
1337         new_entry->is_shared = FALSE;
1338         new_entry->is_sub_map = FALSE;
1339         new_entry->use_pmap = TRUE;
1340         new_entry->object.vm_object = VM_OBJECT_NULL;
1341         new_entry->offset = (vm_object_offset_t) 0;
1342
1343         new_entry->needs_copy = FALSE;
1344
1345         new_entry->inheritance = VM_INHERIT_DEFAULT;
1346         new_entry->protection = VM_PROT_DEFAULT;
1347         new_entry->max_protection = VM_PROT_ALL;
1348         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1349         new_entry->wired_count = 0;
1350         new_entry->user_wired_count = 0;
1351
1352         new_entry->in_transition = FALSE;
1353         new_entry->needs_wakeup = FALSE;
1354         new_entry->no_cache = FALSE;
1355         new_entry->permanent = FALSE;
1356         new_entry->superpage_size = FALSE;
1357         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1358                 new_entry->map_aligned = TRUE;
1359         } else {
1360                 new_entry->map_aligned = FALSE;
1361         }
1362
1363         new_entry->used_for_jit = 0;
1364
1365         new_entry->alias = 0;
1366         new_entry->zero_wired_pages = FALSE;
1367         new_entry->iokit_acct = FALSE;
1368
1369         VM_GET_FLAGS_ALIAS(flags, new_entry->alias);
1370
1371         /*
1372          *      Insert the new entry into the list
1373          */
1374
1375         vm_map_store_entry_link(map, entry, new_entry);
1376
1377         map->size += size;
1378
1379         /*
1380          *      Update the lookup hint
1381          */
1382         SAVE_HINT_MAP_WRITE(map, new_entry);
1383
1384         *o_entry = new_entry;
1385         return(KERN_SUCCESS);
1386 }
1387
1388 int vm_map_pmap_enter_print = FALSE;
1389 int vm_map_pmap_enter_enable = FALSE;
1390
1391 /*
1392  *      Routine:        vm_map_pmap_enter [internal only]
1393  *
1394  *      Description:
1395  *              Force pages from the specified object to be entered into
1396  *              the pmap at the specified address if they are present.
1397  *              As soon as a page not found in the object the scan ends.
1398  *
1399  *      Returns:
1400  *              Nothing.
1401  *
1402  *      In/out conditions:
1403  *              The source map should not be locked on entry.
1404  */
1405 __unused static void
1406 vm_map_pmap_enter(
1407         vm_map_t                map,
1408         register vm_map_offset_t        addr,
1409         register vm_map_offset_t        end_addr,
1410         register vm_object_t    object,
1411         vm_object_offset_t      offset,
1412         vm_prot_t               protection)
1413 {
1414         int                     type_of_fault;
1415         kern_return_t           kr;
1416
1417         if(map->pmap == 0)
1418                 return;
1419
1420         while (addr < end_addr) {
1421                 register vm_page_t      m;
1422
1423
1424                 /*
1425                  * TODO:
1426                  * From vm_map_enter(), we come into this function without the map
1427                  * lock held or the object lock held.
1428                  * We haven't taken a reference on the object either.
1429                  * We should do a proper lookup on the map to make sure
1430                  * that things are sane before we go locking objects that
1431                  * could have been deallocated from under us.
1432                  */
1433
1434                 vm_object_lock(object);
1435
1436                 m = vm_page_lookup(object, offset);
1437                 /*
1438                  * ENCRYPTED SWAP:
1439                  * The user should never see encrypted data, so do not
1440                  * enter an encrypted page in the page table.
1441                  */
1442                 if (m == VM_PAGE_NULL || m->busy || m->encrypted ||
1443                     m->fictitious ||
1444                     (m->unusual && ( m->error || m->restart || m->absent))) {
1445                         vm_object_unlock(object);
1446                         return;
1447                 }
1448
1449                 if (vm_map_pmap_enter_print) {
1450                         printf("vm_map_pmap_enter:");
1451                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1452                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1453                 }
1454                 type_of_fault = DBG_CACHE_HIT_FAULT;
1455                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1456                                     VM_PAGE_WIRED(m), FALSE, FALSE, FALSE,
1457                                     0, /* XXX need user tag / alias? */
1458                                     0, /* alternate accounting? */
1459                                     NULL,
1460                                     &type_of_fault);
1461
1462                 vm_object_unlock(object);
1463
1464                 offset += PAGE_SIZE_64;
1465                 addr += PAGE_SIZE;
1466         }
1467 }
1468
1469 boolean_t vm_map_pmap_is_empty(
1470         vm_map_t        map,
1471         vm_map_offset_t start,
1472         vm_map_offset_t end);
1473 boolean_t vm_map_pmap_is_empty(
1474         vm_map_t        map,
1475         vm_map_offset_t start,
1476         vm_map_offset_t end)
1477 {
1478 #ifdef MACHINE_PMAP_IS_EMPTY
1479         return pmap_is_empty(map->pmap, start, end);
1480 #else   /* MACHINE_PMAP_IS_EMPTY */
1481         vm_map_offset_t offset;
1482         ppnum_t         phys_page;
1483
1484         if (map->pmap == NULL) {
1485                 return TRUE;
1486         }
1487
1488         for (offset = start;
1489              offset < end;
1490              offset += PAGE_SIZE) {
1491                 phys_page = pmap_find_phys(map->pmap, offset);
1492                 if (phys_page) {
1493                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1494                                 "page %d at 0x%llx\n",
1495                                 map, (long long)start, (long long)end,
1496                                 phys_page, (long long)offset);
1497                         return FALSE;
1498                 }
1499         }
1500         return TRUE;
1501 #endif  /* MACHINE_PMAP_IS_EMPTY */
1502 }
1503
1504 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1505 kern_return_t
1506 vm_map_random_address_for_size(
1507         vm_map_t        map,
1508         vm_map_offset_t *address,
1509         vm_map_size_t   size)
1510 {
1511         kern_return_t   kr = KERN_SUCCESS;
1512         int             tries = 0;
1513         vm_map_offset_t random_addr = 0;
1514         vm_map_offset_t hole_end;
1515
1516         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1517         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1518         vm_map_size_t   vm_hole_size = 0;
1519         vm_map_size_t   addr_space_size;
1520
1521         addr_space_size = vm_map_max(map) - vm_map_min(map);
1522
1523         assert(page_aligned(size));
1524
1525         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1526                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1527                 random_addr = vm_map_trunc_page(
1528                         vm_map_min(map) +(random_addr % addr_space_size),
1529                         VM_MAP_PAGE_MASK(map));
1530
1531                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1532                         if (prev_entry == vm_map_to_entry(map)) {
1533                                 next_entry = vm_map_first_entry(map);
1534                         } else {
1535                                 next_entry = prev_entry->vme_next;
1536                         }
1537                         if (next_entry == vm_map_to_entry(map)) {
1538                                 hole_end = vm_map_max(map);
1539                         } else {
1540                                 hole_end = next_entry->vme_start;
1541                         }
1542                         vm_hole_size = hole_end - random_addr;
1543                         if (vm_hole_size >= size) {
1544                                 *address = random_addr;
1545                                 break;
1546                         }
1547                 }
1548                 tries++;
1549         }
1550
1551         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1552                 kr = KERN_NO_SPACE;
1553         }
1554         return kr;
1555 }
1556
1557 /*
1558  *      Routine:        vm_map_enter
1559  *
1560  *      Description:
1561  *              Allocate a range in the specified virtual address map.
1562  *              The resulting range will refer to memory defined by
1563  *              the given memory object and offset into that object.
1564  *
1565  *              Arguments are as defined in the vm_map call.
1566  */
1567 int _map_enter_debug = 0;
1568 static unsigned int vm_map_enter_restore_successes = 0;
1569 static unsigned int vm_map_enter_restore_failures = 0;
1570 kern_return_t
1571 vm_map_enter(
1572         vm_map_t                map,
1573         vm_map_offset_t         *address,       /* IN/OUT */
1574         vm_map_size_t           size,
1575         vm_map_offset_t         mask,
1576         int                     flags,
1577         vm_object_t             object,
1578         vm_object_offset_t      offset,
1579         boolean_t               needs_copy,
1580         vm_prot_t               cur_protection,
1581         vm_prot_t               max_protection,
1582         vm_inherit_t            inheritance)
1583 {
1584         vm_map_entry_t          entry, new_entry;
1585         vm_map_offset_t         start, tmp_start, tmp_offset;
1586         vm_map_offset_t         end, tmp_end;
1587         vm_map_offset_t         tmp2_start, tmp2_end;
1588         vm_map_offset_t         step;
1589         kern_return_t           result = KERN_SUCCESS;
1590         vm_map_t                zap_old_map = VM_MAP_NULL;
1591         vm_map_t                zap_new_map = VM_MAP_NULL;
1592         boolean_t               map_locked = FALSE;
1593         boolean_t               pmap_empty = TRUE;
1594         boolean_t               new_mapping_established = FALSE;
1595         boolean_t               keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0);
1596         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
1597         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
1598         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
1599         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
1600         boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
1601         boolean_t               permanent = ((flags & VM_FLAGS_PERMANENT) != 0);
1602         boolean_t               entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0);
1603         boolean_t               iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0);
1604         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
1605         char                    alias;
1606         vm_map_offset_t         effective_min_offset, effective_max_offset;
1607         kern_return_t           kr;
1608         boolean_t               clear_map_aligned = FALSE;
1609
1610         if (superpage_size) {
1611                 switch (superpage_size) {
1612                         /*
1613                          * Note that the current implementation only supports
1614                          * a single size for superpages, SUPERPAGE_SIZE, per
1615                          * architecture. As soon as more sizes are supposed
1616                          * to be supported, SUPERPAGE_SIZE has to be replaced
1617                          * with a lookup of the size depending on superpage_size.
1618                          */
1619 #ifdef __x86_64__
1620                         case SUPERPAGE_SIZE_ANY:
1621                                 /* handle it like 2 MB and round up to page size */
1622                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
1623                         case SUPERPAGE_SIZE_2MB:
1624                                 break;
1625 #endif
1626                         default:
1627                                 return KERN_INVALID_ARGUMENT;
1628                 }
1629                 mask = SUPERPAGE_SIZE-1;
1630                 if (size & (SUPERPAGE_SIZE-1))
1631                         return KERN_INVALID_ARGUMENT;
1632                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
1633         }
1634
1635
1636
1637         if (is_submap) {
1638                 if (purgable) {
1639                         /* submaps can not be purgeable */
1640                         return KERN_INVALID_ARGUMENT;
1641                 }
1642                 if (object == VM_OBJECT_NULL) {
1643                         /* submaps can not be created lazily */
1644                         return KERN_INVALID_ARGUMENT;
1645                 }
1646         }
1647         if (flags & VM_FLAGS_ALREADY) {
1648                 /*
1649                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
1650                  * is already present.  For it to be meaningul, the requested
1651                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
1652                  * we shouldn't try and remove what was mapped there first
1653                  * (!VM_FLAGS_OVERWRITE).
1654                  */
1655                 if ((flags & VM_FLAGS_ANYWHERE) ||
1656                     (flags & VM_FLAGS_OVERWRITE)) {
1657                         return KERN_INVALID_ARGUMENT;
1658                 }
1659         }
1660
1661         effective_min_offset = map->min_offset;
1662
1663         if (flags & VM_FLAGS_BEYOND_MAX) {
1664                 /*
1665                  * Allow an insertion beyond the map's max offset.
1666                  */
1667                 if (vm_map_is_64bit(map))
1668                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
1669                 else
1670                         effective_max_offset = 0x00000000FFFFF000ULL;
1671         } else {
1672                 effective_max_offset = map->max_offset;
1673         }
1674
1675         if (size == 0 ||
1676             (offset & PAGE_MASK_64) != 0) {
1677                 *address = 0;
1678                 return KERN_INVALID_ARGUMENT;
1679         }
1680
1681         VM_GET_FLAGS_ALIAS(flags, alias);
1682
1683 #define RETURN(value)   { result = value; goto BailOut; }
1684
1685         assert(page_aligned(*address));
1686         assert(page_aligned(size));
1687
1688         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
1689                 /*
1690                  * In most cases, the caller rounds the size up to the
1691                  * map's page size.
1692                  * If we get a size that is explicitly not map-aligned here,
1693                  * we'll have to respect the caller's wish and mark the
1694                  * mapping as "not map-aligned" to avoid tripping the
1695                  * map alignment checks later.
1696                  */
1697                 clear_map_aligned = TRUE;
1698         }
1699         if (!anywhere &&
1700             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
1701                 /*
1702                  * We've been asked to map at a fixed address and that
1703                  * address is not aligned to the map's specific alignment.
1704                  * The caller should know what it's doing (i.e. most likely
1705                  * mapping some fragmented copy map, transferring memory from
1706                  * a VM map with a different alignment), so clear map_aligned
1707                  * for this new VM map entry and proceed.
1708                  */
1709                 clear_map_aligned = TRUE;
1710         }
1711
1712         /*
1713          * Only zero-fill objects are allowed to be purgable.
1714          * LP64todo - limit purgable objects to 32-bits for now
1715          */
1716         if (purgable &&
1717             (offset != 0 ||
1718              (object != VM_OBJECT_NULL &&
1719               (object->vo_size != size ||
1720                object->purgable == VM_PURGABLE_DENY))
1721              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
1722                 return KERN_INVALID_ARGUMENT;
1723
1724         if (!anywhere && overwrite) {
1725                 /*
1726                  * Create a temporary VM map to hold the old mappings in the
1727                  * affected area while we create the new one.
1728                  * This avoids releasing the VM map lock in
1729                  * vm_map_entry_delete() and allows atomicity
1730                  * when we want to replace some mappings with a new one.
1731                  * It also allows us to restore the old VM mappings if the
1732                  * new mapping fails.
1733                  */
1734                 zap_old_map = vm_map_create(PMAP_NULL,
1735                                             *address,
1736                                             *address + size,
1737                                             map->hdr.entries_pageable);
1738                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
1739         }
1740
1741 StartAgain: ;
1742
1743         start = *address;
1744
1745         if (anywhere) {
1746                 vm_map_lock(map);
1747                 map_locked = TRUE;
1748
1749                 if (entry_for_jit) {
1750                         if (map->jit_entry_exists) {
1751                                 result = KERN_INVALID_ARGUMENT;
1752                                 goto BailOut;
1753                         }
1754                         /*
1755                          * Get a random start address.
1756                          */
1757                         result = vm_map_random_address_for_size(map, address, size);
1758                         if (result != KERN_SUCCESS) {
1759                                 goto BailOut;
1760                         }
1761                         start = *address;
1762                 }
1763
1764
1765                 /*
1766                  *      Calculate the first possible address.
1767                  */
1768
1769                 if (start < effective_min_offset)
1770                         start = effective_min_offset;
1771                 if (start > effective_max_offset)
1772                         RETURN(KERN_NO_SPACE);
1773
1774                 /*
1775                  *      Look for the first possible address;
1776                  *      if there's already something at this
1777                  *      address, we have to start after it.
1778                  */
1779
1780                 if( map->disable_vmentry_reuse == TRUE) {
1781                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1782                 } else {
1783                         assert(first_free_is_valid(map));
1784
1785                         entry = map->first_free;
1786
1787                         if (entry == vm_map_to_entry(map)) {
1788                                 entry = NULL;
1789                         } else {
1790                                if (entry->vme_next == vm_map_to_entry(map)){
1791                                        /*
1792                                         * Hole at the end of the map.
1793                                         */
1794                                         entry = NULL;
1795                                } else {
1796                                         if (start < (entry->vme_next)->vme_start ) {
1797                                                 start = entry->vme_end;
1798                                                 start = vm_map_round_page(start,
1799                                                                           VM_MAP_PAGE_MASK(map));
1800                                         } else {
1801                                                 /*
1802                                                  * Need to do a lookup.
1803                                                  */
1804                                                 entry = NULL;
1805                                         }
1806                                }
1807                         }
1808
1809                         if (entry == NULL) {
1810                                 vm_map_entry_t  tmp_entry;
1811                                 if (vm_map_lookup_entry(map, start, &tmp_entry)) {
1812                                         assert(!entry_for_jit);
1813                                         start = tmp_entry->vme_end;
1814                                         start = vm_map_round_page(start,
1815                                                                   VM_MAP_PAGE_MASK(map));
1816                                 }
1817                                 entry = tmp_entry;
1818                         }
1819                 }
1820
1821                 /*
1822                  *      In any case, the "entry" always precedes
1823                  *      the proposed new region throughout the
1824                  *      loop:
1825                  */
1826
1827                 while (TRUE) {
1828                         register vm_map_entry_t next;
1829
1830                         /*
1831                          *      Find the end of the proposed new region.
1832                          *      Be sure we didn't go beyond the end, or
1833                          *      wrap around the address.
1834                          */
1835
1836                         end = ((start + mask) & ~mask);
1837                         end = vm_map_round_page(end,
1838                                                 VM_MAP_PAGE_MASK(map));
1839                         if (end < start)
1840                                 RETURN(KERN_NO_SPACE);
1841                         start = end;
1842                         assert(VM_MAP_PAGE_ALIGNED(start,
1843                                                    VM_MAP_PAGE_MASK(map)));
1844                         end += size;
1845
1846                         if ((end > effective_max_offset) || (end < start)) {
1847                                 if (map->wait_for_space) {
1848                                         assert(!keep_map_locked);
1849                                         if (size <= (effective_max_offset -
1850                                                      effective_min_offset)) {
1851                                                 assert_wait((event_t)map,
1852                                                             THREAD_ABORTSAFE);
1853                                                 vm_map_unlock(map);
1854                                                 map_locked = FALSE;
1855                                                 thread_block(THREAD_CONTINUE_NULL);
1856                                                 goto StartAgain;
1857                                         }
1858                                 }
1859                                 RETURN(KERN_NO_SPACE);
1860                         }
1861
1862                         /*
1863                          *      If there are no more entries, we must win.
1864                          */
1865
1866                         next = entry->vme_next;
1867                         if (next == vm_map_to_entry(map))
1868                                 break;
1869
1870                         /*
1871                          *      If there is another entry, it must be
1872                          *      after the end of the potential new region.
1873                          */
1874
1875                         if (next->vme_start >= end)
1876                                 break;
1877
1878                         /*
1879                          *      Didn't fit -- move to the next entry.
1880                          */
1881
1882                         entry = next;
1883                         start = entry->vme_end;
1884                         start = vm_map_round_page(start,
1885                                                   VM_MAP_PAGE_MASK(map));
1886                 }
1887                 *address = start;
1888                 assert(VM_MAP_PAGE_ALIGNED(*address,
1889                                            VM_MAP_PAGE_MASK(map)));
1890         } else {
1891                 /*
1892                  *      Verify that:
1893                  *              the address doesn't itself violate
1894                  *              the mask requirement.
1895                  */
1896
1897                 vm_map_lock(map);
1898                 map_locked = TRUE;
1899                 if ((start & mask) != 0)
1900                         RETURN(KERN_NO_SPACE);
1901
1902                 /*
1903                  *      ...     the address is within bounds
1904                  */
1905
1906                 end = start + size;
1907
1908                 if ((start < effective_min_offset) ||
1909                     (end > effective_max_offset) ||
1910                     (start >= end)) {
1911                         RETURN(KERN_INVALID_ADDRESS);
1912                 }
1913
1914                 if (overwrite && zap_old_map != VM_MAP_NULL) {
1915                         /*
1916                          * Fixed mapping and "overwrite" flag: attempt to
1917                          * remove all existing mappings in the specified
1918                          * address range, saving them in our "zap_old_map".
1919                          */
1920                         (void) vm_map_delete(map, start, end,
1921                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
1922                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
1923                                              zap_old_map);
1924                 }
1925
1926                 /*
1927                  *      ...     the starting address isn't allocated
1928                  */
1929
1930                 if (vm_map_lookup_entry(map, start, &entry)) {
1931                         if (! (flags & VM_FLAGS_ALREADY)) {
1932                                 RETURN(KERN_NO_SPACE);
1933                         }
1934                         /*
1935                          * Check if what's already there is what we want.
1936                          */
1937                         tmp_start = start;
1938                         tmp_offset = offset;
1939                         if (entry->vme_start < start) {
1940                                 tmp_start -= start - entry->vme_start;
1941                                 tmp_offset -= start - entry->vme_start;
1942
1943                         }
1944                         for (; entry->vme_start < end;
1945                              entry = entry->vme_next) {
1946                                 /*
1947                                  * Check if the mapping's attributes
1948                                  * match the existing map entry.
1949                                  */
1950                                 if (entry == vm_map_to_entry(map) ||
1951                                     entry->vme_start != tmp_start ||
1952                                     entry->is_sub_map != is_submap ||
1953                                     entry->offset != tmp_offset ||
1954                                     entry->needs_copy != needs_copy ||
1955                                     entry->protection != cur_protection ||
1956                                     entry->max_protection != max_protection ||
1957                                     entry->inheritance != inheritance ||
1958                                     entry->iokit_acct != iokit_acct ||
1959                                     entry->alias != alias) {
1960                                         /* not the same mapping ! */
1961                                         RETURN(KERN_NO_SPACE);
1962                                 }
1963                                 /*
1964                                  * Check if the same object is being mapped.
1965                                  */
1966                                 if (is_submap) {
1967                                         if (entry->object.sub_map !=
1968                                             (vm_map_t) object) {
1969                                                 /* not the same submap */
1970                                                 RETURN(KERN_NO_SPACE);
1971                                         }
1972                                 } else {
1973                                         if (entry->object.vm_object != object) {
1974                                                 /* not the same VM object... */
1975                                                 vm_object_t obj2;
1976
1977                                                 obj2 = entry->object.vm_object;
1978                                                 if ((obj2 == VM_OBJECT_NULL ||
1979                                                      obj2->internal) &&
1980                                                     (object == VM_OBJECT_NULL ||
1981                                                      object->internal)) {
1982                                                         /*
1983                                                          * ... but both are
1984                                                          * anonymous memory,
1985                                                          * so equivalent.
1986                                                          */
1987                                                 } else {
1988                                                         RETURN(KERN_NO_SPACE);
1989                                                 }
1990                                         }
1991                                 }
1992
1993                                 tmp_offset += entry->vme_end - entry->vme_start;
1994                                 tmp_start += entry->vme_end - entry->vme_start;
1995                                 if (entry->vme_end >= end) {
1996                                         /* reached the end of our mapping */
1997                                         break;
1998                                 }
1999                         }
2000                         /* it all matches:  let's use what's already there ! */
2001                         RETURN(KERN_MEMORY_PRESENT);
2002                 }
2003
2004                 /*
2005                  *      ...     the next region doesn't overlap the
2006                  *              end point.
2007                  */
2008
2009                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2010                     (entry->vme_next->vme_start < end))
2011                         RETURN(KERN_NO_SPACE);
2012         }
2013
2014         /*
2015          *      At this point,
2016          *              "start" and "end" should define the endpoints of the
2017          *                      available new range, and
2018          *              "entry" should refer to the region before the new
2019          *                      range, and
2020          *
2021          *              the map should be locked.
2022          */
2023
2024         /*
2025          *      See whether we can avoid creating a new entry (and object) by
2026          *      extending one of our neighbors.  [So far, we only attempt to
2027          *      extend from below.]  Note that we can never extend/join
2028          *      purgable objects because they need to remain distinct
2029          *      entities in order to implement their "volatile object"
2030          *      semantics.
2031          */
2032
2033         if (purgable || entry_for_jit) {
2034                 if (object == VM_OBJECT_NULL) {
2035                         object = vm_object_allocate(size);
2036                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2037                         object->true_share = TRUE;
2038                         if (purgable) {
2039                                 task_t owner;
2040                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2041                                 if (map->pmap == kernel_pmap) {
2042                                         /*
2043                                          * Purgeable mappings made in a kernel
2044                                          * map are "owned" by the kernel itself
2045                                          * rather than the current user task
2046                                          * because they're likely to be used by
2047                                          * more than this user task (see
2048                                          * execargs_purgeable_allocate(), for
2049                                          * example).
2050                                          */
2051                                         owner = kernel_task;
2052                                 } else {
2053                                         owner = current_task();
2054                                 }
2055                                 assert(object->vo_purgeable_owner == NULL);
2056                                 assert(object->resident_page_count == 0);
2057                                 assert(object->wired_page_count == 0);
2058                                 vm_object_lock(object);
2059                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2060                                 vm_object_unlock(object);
2061                         }
2062                         offset = (vm_object_offset_t)0;
2063                 }
2064         } else if ((is_submap == FALSE) &&
2065                    (object == VM_OBJECT_NULL) &&
2066                    (entry != vm_map_to_entry(map)) &&
2067                    (entry->vme_end == start) &&
2068                    (!entry->is_shared) &&
2069                    (!entry->is_sub_map) &&
2070                    (!entry->in_transition) &&
2071                    (!entry->needs_wakeup) &&
2072                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2073                    (entry->protection == cur_protection) &&
2074                    (entry->max_protection == max_protection) &&
2075                    (entry->inheritance == inheritance) &&
2076                    ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) &&
2077                    (entry->no_cache == no_cache) &&
2078                    (entry->permanent == permanent) &&
2079                    (!entry->superpage_size && !superpage_size) &&
2080                    /*
2081                     * No coalescing if not map-aligned, to avoid propagating
2082                     * that condition any further than needed:
2083                     */
2084                    (!entry->map_aligned || !clear_map_aligned) &&
2085                    (!entry->zero_wired_pages) &&
2086                    (!entry->used_for_jit && !entry_for_jit) &&
2087                    (entry->iokit_acct == iokit_acct) &&
2088
2089                    ((entry->vme_end - entry->vme_start) + size <=
2090                     (alias == VM_MEMORY_REALLOC ?
2091                      ANON_CHUNK_SIZE :
2092                      NO_COALESCE_LIMIT)) &&
2093
2094                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2095                 if (vm_object_coalesce(entry->object.vm_object,
2096                                        VM_OBJECT_NULL,
2097                                        entry->offset,
2098                                        (vm_object_offset_t) 0,
2099                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2100                                        (vm_map_size_t)(end - entry->vme_end))) {
2101
2102                         /*
2103                          *      Coalesced the two objects - can extend
2104                          *      the previous map entry to include the
2105                          *      new range.
2106                          */
2107                         map->size += (end - entry->vme_end);
2108                         assert(entry->vme_start < end);
2109                         assert(VM_MAP_PAGE_ALIGNED(end,
2110                                                    VM_MAP_PAGE_MASK(map)));
2111                         entry->vme_end = end;
2112                         vm_map_store_update_first_free(map, map->first_free);
2113                         new_mapping_established = TRUE;
2114                         RETURN(KERN_SUCCESS);
2115                 }
2116         }
2117
2118         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2119         new_entry = NULL;
2120
2121         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2122                 tmp2_end = tmp2_start + step;
2123                 /*
2124                  *      Create a new entry
2125                  *      LP64todo - for now, we can only allocate 4GB internal objects
2126                  *      because the default pager can't page bigger ones.  Remove this
2127                  *      when it can.
2128                  *
2129                  * XXX FBDP
2130                  * The reserved "page zero" in each process's address space can
2131                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2132                  * therefore different VM map entries serves no purpose and just
2133                  * slows down operations on the VM map, so let's not split the
2134                  * allocation into 4GB chunks if the max protection is NONE.  That
2135                  * memory should never be accessible, so it will never get to the
2136                  * default pager.
2137                  */
2138                 tmp_start = tmp2_start;
2139                 if (object == VM_OBJECT_NULL &&
2140                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2141                     max_protection != VM_PROT_NONE &&
2142                     superpage_size == 0)
2143                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2144                 else
2145                         tmp_end = tmp2_end;
2146                 do {
2147                         new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end,
2148                                                         object, offset, needs_copy,
2149                                                         FALSE, FALSE,
2150                                                         cur_protection, max_protection,
2151                                                         VM_BEHAVIOR_DEFAULT,
2152                                                         (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2153                                                         0, no_cache,
2154                                                         permanent,
2155                                                         superpage_size,
2156                                                         clear_map_aligned,
2157                                                         is_submap);
2158                         new_entry->alias = alias;
2159                         if (entry_for_jit){
2160                                 if (!(map->jit_entry_exists)){
2161                                         new_entry->used_for_jit = TRUE;
2162                                         map->jit_entry_exists = TRUE;
2163                                 }
2164                         }
2165
2166                         assert(!new_entry->iokit_acct);
2167                         if (!is_submap &&
2168                             object != VM_OBJECT_NULL &&
2169                             object->purgable != VM_PURGABLE_DENY) {
2170                                 assert(new_entry->use_pmap);
2171                                 assert(!new_entry->iokit_acct);
2172                                 /*
2173                                  * Turn off pmap accounting since
2174                                  * purgeable objects have their
2175                                  * own ledgers.
2176                                  */
2177                                 new_entry->use_pmap = FALSE;
2178                         } else if (!is_submap &&
2179                                    iokit_acct) {
2180                                 /* alternate accounting */
2181                                 assert(!new_entry->iokit_acct);
2182                                 assert(new_entry->use_pmap);
2183                                 new_entry->iokit_acct = TRUE;
2184                                 new_entry->use_pmap = FALSE;
2185                                 vm_map_iokit_mapped_region(
2186                                         map,
2187                                         (new_entry->vme_end -
2188                                          new_entry->vme_start));
2189                         } else if (!is_submap) {
2190                                 assert(!new_entry->iokit_acct);
2191                                 assert(new_entry->use_pmap);
2192                         }
2193
2194                         if (is_submap) {
2195                                 vm_map_t        submap;
2196                                 boolean_t       submap_is_64bit;
2197                                 boolean_t       use_pmap;
2198
2199                                 assert(new_entry->is_sub_map);
2200                                 assert(!new_entry->use_pmap);
2201                                 assert(!new_entry->iokit_acct);
2202                                 submap = (vm_map_t) object;
2203                                 submap_is_64bit = vm_map_is_64bit(submap);
2204                                 use_pmap = (alias == VM_MEMORY_SHARED_PMAP);
2205 #ifndef NO_NESTED_PMAP
2206                                 if (use_pmap && submap->pmap == NULL) {
2207                                         ledger_t ledger = map->pmap->ledger;
2208                                         /* we need a sub pmap to nest... */
2209                                         submap->pmap = pmap_create(ledger, 0,
2210                                             submap_is_64bit);
2211                                         if (submap->pmap == NULL) {
2212                                                 /* let's proceed without nesting... */
2213                                         }
2214                                 }
2215                                 if (use_pmap && submap->pmap != NULL) {
2216                                         kr = pmap_nest(map->pmap,
2217                                                        submap->pmap,
2218                                                        tmp_start,
2219                                                        tmp_start,
2220                                                        tmp_end - tmp_start);
2221                                         if (kr != KERN_SUCCESS) {
2222                                                 printf("vm_map_enter: "
2223                                                        "pmap_nest(0x%llx,0x%llx) "
2224                                                        "error 0x%x\n",
2225                                                        (long long)tmp_start,
2226                                                        (long long)tmp_end,
2227                                                        kr);
2228                                         } else {
2229                                                 /* we're now nested ! */
2230                                                 new_entry->use_pmap = TRUE;
2231                                                 pmap_empty = FALSE;
2232                                         }
2233                                 }
2234 #endif /* NO_NESTED_PMAP */
2235                         }
2236                         entry = new_entry;
2237
2238                         if (superpage_size) {
2239                                 vm_page_t pages, m;
2240                                 vm_object_t sp_object;
2241
2242                                 entry->offset = 0;
2243
2244                                 /* allocate one superpage */
2245                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2246                                 if (kr != KERN_SUCCESS) {
2247                                         new_mapping_established = TRUE; /* will cause deallocation of whole range */
2248                                         RETURN(kr);
2249                                 }
2250
2251                                 /* create one vm_object per superpage */
2252                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2253                                 sp_object->phys_contiguous = TRUE;
2254                                 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE;
2255                                 entry->object.vm_object = sp_object;
2256                                 assert(entry->use_pmap);
2257
2258                                 /* enter the base pages into the object */
2259                                 vm_object_lock(sp_object);
2260                                 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) {
2261                                         m = pages;
2262                                         pmap_zero_page(m->phys_page);
2263                                         pages = NEXT_PAGE(m);
2264                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2265                                         vm_page_insert(m, sp_object, offset);
2266                                 }
2267                                 vm_object_unlock(sp_object);
2268                         }
2269                 } while (tmp_end != tmp2_end &&
2270                          (tmp_start = tmp_end) &&
2271                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2272                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2273         }
2274
2275         new_mapping_established = TRUE;
2276
2277 BailOut:
2278         assert(map_locked == TRUE);
2279
2280         if (result == KERN_SUCCESS) {
2281                 vm_prot_t pager_prot;
2282                 memory_object_t pager;
2283
2284 #if DEBUG
2285                 if (pmap_empty &&
2286                     !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
2287                         assert(vm_map_pmap_is_empty(map,
2288                                                     *address,
2289                                                     *address+size));
2290                 }
2291 #endif /* DEBUG */
2292
2293                 /*
2294                  * For "named" VM objects, let the pager know that the
2295                  * memory object is being mapped.  Some pagers need to keep
2296                  * track of this, to know when they can reclaim the memory
2297                  * object, for example.
2298                  * VM calls memory_object_map() for each mapping (specifying
2299                  * the protection of each mapping) and calls
2300                  * memory_object_last_unmap() when all the mappings are gone.
2301                  */
2302                 pager_prot = max_protection;
2303                 if (needs_copy) {
2304                         /*
2305                          * Copy-On-Write mapping: won't modify
2306                          * the memory object.
2307                          */
2308                         pager_prot &= ~VM_PROT_WRITE;
2309                 }
2310                 if (!is_submap &&
2311                     object != VM_OBJECT_NULL &&
2312                     object->named &&
2313                     object->pager != MEMORY_OBJECT_NULL) {
2314                         vm_object_lock(object);
2315                         pager = object->pager;
2316                         if (object->named &&
2317                             pager != MEMORY_OBJECT_NULL) {
2318                                 assert(object->pager_ready);
2319                                 vm_object_mapping_wait(object, THREAD_UNINT);
2320                                 vm_object_mapping_begin(object);
2321                                 vm_object_unlock(object);
2322
2323                                 kr = memory_object_map(pager, pager_prot);
2324                                 assert(kr == KERN_SUCCESS);
2325
2326                                 vm_object_lock(object);
2327                                 vm_object_mapping_end(object);
2328                         }
2329                         vm_object_unlock(object);
2330                 }
2331         }
2332
2333         assert(map_locked == TRUE);
2334
2335         if (!keep_map_locked) {
2336                 vm_map_unlock(map);
2337                 map_locked = FALSE;
2338         }
2339
2340         /*
2341          * We can't hold the map lock if we enter this block.
2342          */
2343
2344         if (result == KERN_SUCCESS) {
2345
2346                 /*      Wire down the new entry if the user
2347                  *      requested all new map entries be wired.
2348                  */
2349                 if ((map->wiring_required)||(superpage_size)) {
2350                         assert(!keep_map_locked);
2351                         pmap_empty = FALSE; /* pmap won't be empty */
2352                         kr = vm_map_wire(map, start, end,
2353                                              new_entry->protection, TRUE);
2354                         result = kr;
2355                 }
2356
2357         }
2358
2359         if (result != KERN_SUCCESS) {
2360                 if (new_mapping_established) {
2361                         /*
2362                          * We have to get rid of the new mappings since we
2363                          * won't make them available to the user.
2364                          * Try and do that atomically, to minimize the risk
2365                          * that someone else create new mappings that range.
2366                          */
2367                         zap_new_map = vm_map_create(PMAP_NULL,
2368                                                     *address,
2369                                                     *address + size,
2370                                                     map->hdr.entries_pageable);
2371                         vm_map_set_page_shift(zap_new_map,
2372                                               VM_MAP_PAGE_SHIFT(map));
2373                         if (!map_locked) {
2374                                 vm_map_lock(map);
2375                                 map_locked = TRUE;
2376                         }
2377                         (void) vm_map_delete(map, *address, *address+size,
2378                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2379                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2380                                              zap_new_map);
2381                 }
2382                 if (zap_old_map != VM_MAP_NULL &&
2383                     zap_old_map->hdr.nentries != 0) {
2384                         vm_map_entry_t  entry1, entry2;
2385
2386                         /*
2387                          * The new mapping failed.  Attempt to restore
2388                          * the old mappings, saved in the "zap_old_map".
2389                          */
2390                         if (!map_locked) {
2391                                 vm_map_lock(map);
2392                                 map_locked = TRUE;
2393                         }
2394
2395                         /* first check if the coast is still clear */
2396                         start = vm_map_first_entry(zap_old_map)->vme_start;
2397                         end = vm_map_last_entry(zap_old_map)->vme_end;
2398                         if (vm_map_lookup_entry(map, start, &entry1) ||
2399                             vm_map_lookup_entry(map, end, &entry2) ||
2400                             entry1 != entry2) {
2401                                 /*
2402                                  * Part of that range has already been
2403                                  * re-mapped:  we can't restore the old
2404                                  * mappings...
2405                                  */
2406                                 vm_map_enter_restore_failures++;
2407                         } else {
2408                                 /*
2409                                  * Transfer the saved map entries from
2410                                  * "zap_old_map" to the original "map",
2411                                  * inserting them all after "entry1".
2412                                  */
2413                                 for (entry2 = vm_map_first_entry(zap_old_map);
2414                                      entry2 != vm_map_to_entry(zap_old_map);
2415                                      entry2 = vm_map_first_entry(zap_old_map)) {
2416                                         vm_map_size_t entry_size;
2417
2418                                         entry_size = (entry2->vme_end -
2419                                                       entry2->vme_start);
2420                                         vm_map_store_entry_unlink(zap_old_map,
2421                                                             entry2);
2422                                         zap_old_map->size -= entry_size;
2423                                         vm_map_store_entry_link(map, entry1, entry2);
2424                                         map->size += entry_size;
2425                                         entry1 = entry2;
2426                                 }
2427                                 if (map->wiring_required) {
2428                                         /*
2429                                          * XXX TODO: we should rewire the
2430                                          * old pages here...
2431                                          */
2432                                 }
2433                                 vm_map_enter_restore_successes++;
2434                         }
2435                 }
2436         }
2437
2438         /*
2439          * The caller is responsible for releasing the lock if it requested to
2440          * keep the map locked.
2441          */
2442         if (map_locked && !keep_map_locked) {
2443                 vm_map_unlock(map);
2444         }
2445
2446         /*
2447          * Get rid of the "zap_maps" and all the map entries that
2448          * they may still contain.
2449          */
2450         if (zap_old_map != VM_MAP_NULL) {
2451                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2452                 zap_old_map = VM_MAP_NULL;
2453         }
2454         if (zap_new_map != VM_MAP_NULL) {
2455                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
2456                 zap_new_map = VM_MAP_NULL;
2457         }
2458
2459         return result;
2460
2461 #undef  RETURN
2462 }
2463
2464 /*
2465  * Counters for the prefault optimization.
2466  */
2467 int64_t vm_prefault_nb_pages = 0;
2468 int64_t vm_prefault_nb_bailout = 0;
2469
2470 static kern_return_t
2471 vm_map_enter_mem_object_helper(
2472         vm_map_t                target_map,
2473         vm_map_offset_t         *address,
2474         vm_map_size_t           initial_size,
2475         vm_map_offset_t         mask,
2476         int                     flags,
2477         ipc_port_t              port,
2478         vm_object_offset_t      offset,
2479         boolean_t               copy,
2480         vm_prot_t               cur_protection,
2481         vm_prot_t               max_protection,
2482         vm_inherit_t            inheritance,
2483         upl_page_list_ptr_t     page_list,
2484         unsigned int            page_list_count)
2485 {
2486         vm_map_address_t        map_addr;
2487         vm_map_size_t           map_size;
2488         vm_object_t             object;
2489         vm_object_size_t        size;
2490         kern_return_t           result;
2491         boolean_t               mask_cur_protection, mask_max_protection;
2492         boolean_t               try_prefault = (page_list_count != 0);
2493         vm_map_offset_t         offset_in_mapping;
2494
2495         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
2496         mask_max_protection = max_protection & VM_PROT_IS_MASK;
2497         cur_protection &= ~VM_PROT_IS_MASK;
2498         max_protection &= ~VM_PROT_IS_MASK;
2499
2500         /*
2501          * Check arguments for validity
2502          */
2503         if ((target_map == VM_MAP_NULL) ||
2504             (cur_protection & ~VM_PROT_ALL) ||
2505             (max_protection & ~VM_PROT_ALL) ||
2506             (inheritance > VM_INHERIT_LAST_VALID) ||
2507             (try_prefault && (copy || !page_list)) ||
2508             initial_size == 0)
2509                 return KERN_INVALID_ARGUMENT;
2510
2511         map_addr = vm_map_trunc_page(*address,
2512                                      VM_MAP_PAGE_MASK(target_map));
2513         map_size = vm_map_round_page(initial_size,
2514                                      VM_MAP_PAGE_MASK(target_map));
2515         size = vm_object_round_page(initial_size);
2516
2517         /*
2518          * Find the vm object (if any) corresponding to this port.
2519          */
2520         if (!IP_VALID(port)) {
2521                 object = VM_OBJECT_NULL;
2522                 offset = 0;
2523                 copy = FALSE;
2524         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
2525                 vm_named_entry_t        named_entry;
2526
2527                 named_entry = (vm_named_entry_t) port->ip_kobject;
2528
2529                 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2530                         offset += named_entry->data_offset;
2531                 }
2532
2533                 /* a few checks to make sure user is obeying rules */
2534                 if (size == 0) {
2535                         if (offset >= named_entry->size)
2536                                 return KERN_INVALID_RIGHT;
2537                         size = named_entry->size - offset;
2538                 }
2539                 if (mask_max_protection) {
2540                         max_protection &= named_entry->protection;
2541                 }
2542                 if (mask_cur_protection) {
2543                         cur_protection &= named_entry->protection;
2544                 }
2545                 if ((named_entry->protection & max_protection) !=
2546                     max_protection)
2547                         return KERN_INVALID_RIGHT;
2548                 if ((named_entry->protection & cur_protection) !=
2549                     cur_protection)
2550                         return KERN_INVALID_RIGHT;
2551                 if (offset + size < offset) {
2552                         /* overflow */
2553                         return KERN_INVALID_ARGUMENT;
2554                 }
2555                 if (named_entry->size < (offset + size))
2556                         return KERN_INVALID_ARGUMENT;
2557
2558                 if (named_entry->is_copy) {
2559                         /* for a vm_map_copy, we can only map it whole */
2560                         if ((size != named_entry->size) &&
2561                             (vm_map_round_page(size,
2562                                                VM_MAP_PAGE_MASK(target_map)) ==
2563                              named_entry->size)) {
2564                                 /* XXX FBDP use the rounded size... */
2565                                 size = vm_map_round_page(
2566                                         size,
2567                                         VM_MAP_PAGE_MASK(target_map));
2568                         }
2569
2570                         if (!(flags & VM_FLAGS_ANYWHERE) &&
2571                             (offset != 0 ||
2572                              size != named_entry->size)) {
2573                                 /*
2574                                  * XXX for a mapping at a "fixed" address,
2575                                  * we can't trim after mapping the whole
2576                                  * memory entry, so reject a request for a
2577                                  * partial mapping.
2578                                  */
2579                                 return KERN_INVALID_ARGUMENT;
2580                         }
2581                 }
2582
2583                 /* the callers parameter offset is defined to be the */
2584                 /* offset from beginning of named entry offset in object */
2585                 offset = offset + named_entry->offset;
2586
2587                 if (! VM_MAP_PAGE_ALIGNED(size,
2588                                           VM_MAP_PAGE_MASK(target_map))) {
2589                         /*
2590                          * Let's not map more than requested;
2591                          * vm_map_enter() will handle this "not map-aligned"
2592                          * case.
2593                          */
2594                         map_size = size;
2595                 }
2596
2597                 named_entry_lock(named_entry);
2598                 if (named_entry->is_sub_map) {
2599                         vm_map_t                submap;
2600
2601                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2602                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2603                         }
2604
2605                         submap = named_entry->backing.map;
2606                         vm_map_lock(submap);
2607                         vm_map_reference(submap);
2608                         vm_map_unlock(submap);
2609                         named_entry_unlock(named_entry);
2610
2611                         result = vm_map_enter(target_map,
2612                                               &map_addr,
2613                                               map_size,
2614                                               mask,
2615                                               flags | VM_FLAGS_SUBMAP,
2616                                               (vm_object_t) submap,
2617                                               offset,
2618                                               copy,
2619                                               cur_protection,
2620                                               max_protection,
2621                                               inheritance);
2622                         if (result != KERN_SUCCESS) {
2623                                 vm_map_deallocate(submap);
2624                         } else {
2625                                 /*
2626                                  * No need to lock "submap" just to check its
2627                                  * "mapped" flag: that flag is never reset
2628                                  * once it's been set and if we race, we'll
2629                                  * just end up setting it twice, which is OK.
2630                                  */
2631                                 if (submap->mapped_in_other_pmaps == FALSE &&
2632                                     vm_map_pmap(submap) != PMAP_NULL &&
2633                                     vm_map_pmap(submap) !=
2634                                     vm_map_pmap(target_map)) {
2635                                         /*
2636                                          * This submap is being mapped in a map
2637                                          * that uses a different pmap.
2638                                          * Set its "mapped_in_other_pmaps" flag
2639                                          * to indicate that we now need to
2640                                          * remove mappings from all pmaps rather
2641                                          * than just the submap's pmap.
2642                                          */
2643                                         vm_map_lock(submap);
2644                                         submap->mapped_in_other_pmaps = TRUE;
2645                                         vm_map_unlock(submap);
2646                                 }
2647                                 *address = map_addr;
2648                         }
2649                         return result;
2650
2651                 } else if (named_entry->is_pager) {
2652                         unsigned int    access;
2653                         vm_prot_t       protections;
2654                         unsigned int    wimg_mode;
2655
2656                         protections = named_entry->protection & VM_PROT_ALL;
2657                         access = GET_MAP_MEM(named_entry->protection);
2658
2659                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2660                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
2661                         }
2662
2663                         object = vm_object_enter(named_entry->backing.pager,
2664                                                  named_entry->size,
2665                                                  named_entry->internal,
2666                                                  FALSE,
2667                                                  FALSE);
2668                         if (object == VM_OBJECT_NULL) {
2669                                 named_entry_unlock(named_entry);
2670                                 return KERN_INVALID_OBJECT;
2671                         }
2672
2673                         /* JMM - drop reference on pager here */
2674
2675                         /* create an extra ref for the named entry */
2676                         vm_object_lock(object);
2677                         vm_object_reference_locked(object);
2678                         named_entry->backing.object = object;
2679                         named_entry->is_pager = FALSE;
2680                         named_entry_unlock(named_entry);
2681
2682                         wimg_mode = object->wimg_bits;
2683
2684                         if (access == MAP_MEM_IO) {
2685                                 wimg_mode = VM_WIMG_IO;
2686                         } else if (access == MAP_MEM_COPYBACK) {
2687                                 wimg_mode = VM_WIMG_USE_DEFAULT;
2688                         } else if (access == MAP_MEM_INNERWBACK) {
2689                                 wimg_mode = VM_WIMG_INNERWBACK;
2690                         } else if (access == MAP_MEM_WTHRU) {
2691                                 wimg_mode = VM_WIMG_WTHRU;
2692                         } else if (access == MAP_MEM_WCOMB) {
2693                                 wimg_mode = VM_WIMG_WCOMB;
2694                         }
2695
2696                         /* wait for object (if any) to be ready */
2697                         if (!named_entry->internal) {
2698                                 while (!object->pager_ready) {
2699                                         vm_object_wait(
2700                                                 object,
2701                                                 VM_OBJECT_EVENT_PAGER_READY,
2702                                                 THREAD_UNINT);
2703                                         vm_object_lock(object);
2704                                 }
2705                         }
2706
2707                         if (object->wimg_bits != wimg_mode)
2708                                 vm_object_change_wimg_mode(object, wimg_mode);
2709
2710 #if VM_OBJECT_TRACKING_OP_TRUESHARE
2711                         if (!object->true_share &&
2712                             vm_object_tracking_inited) {
2713                                 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
2714                                 int num = 0;
2715
2716                                 num = OSBacktrace(bt,
2717                                                   VM_OBJECT_TRACKING_BTDEPTH);
2718                                 btlog_add_entry(vm_object_tracking_btlog,
2719                                                 object,
2720                                                 VM_OBJECT_TRACKING_OP_TRUESHARE,
2721                                                 bt,
2722                                                 num);
2723                         }
2724 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
2725
2726                         object->true_share = TRUE;
2727
2728                         if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
2729                                 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
2730                         vm_object_unlock(object);
2731
2732                 } else if (named_entry->is_copy) {
2733                         kern_return_t   kr;
2734                         vm_map_copy_t   copy_map;
2735                         vm_map_entry_t  copy_entry;
2736                         vm_map_offset_t copy_addr;
2737
2738                         if (flags & ~(VM_FLAGS_FIXED |
2739                                       VM_FLAGS_ANYWHERE |
2740                                       VM_FLAGS_OVERWRITE |
2741                                       VM_FLAGS_RETURN_DATA_ADDR)) {
2742                                 named_entry_unlock(named_entry);
2743                                 return KERN_INVALID_ARGUMENT;
2744                         }
2745
2746                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2747                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
2748                                 offset = vm_object_trunc_page(offset);
2749                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2750                         }
2751
2752                         copy_map = named_entry->backing.copy;
2753                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
2754                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
2755                                 /* unsupported type; should not happen */
2756                                 printf("vm_map_enter_mem_object: "
2757                                        "memory_entry->backing.copy "
2758                                        "unsupported type 0x%x\n",
2759                                        copy_map->type);
2760                                 named_entry_unlock(named_entry);
2761                                 return KERN_INVALID_ARGUMENT;
2762                         }
2763
2764                         /* reserve a contiguous range */
2765                         kr = vm_map_enter(target_map,
2766                                           &map_addr,
2767                                           /* map whole mem entry, trim later: */
2768                                           named_entry->size,
2769                                           mask,
2770                                           flags & (VM_FLAGS_ANYWHERE |
2771                                                    VM_FLAGS_OVERWRITE |
2772                                                    VM_FLAGS_RETURN_DATA_ADDR),
2773                                           VM_OBJECT_NULL,
2774                                           0,
2775                                           FALSE, /* copy */
2776                                           cur_protection,
2777                                           max_protection,
2778                                           inheritance);
2779                         if (kr != KERN_SUCCESS) {
2780                                 named_entry_unlock(named_entry);
2781                                 return kr;
2782                         }
2783
2784                         copy_addr = map_addr;
2785
2786                         for (copy_entry = vm_map_copy_first_entry(copy_map);
2787                              copy_entry != vm_map_copy_to_entry(copy_map);
2788                              copy_entry = copy_entry->vme_next) {
2789                                 int                     remap_flags = 0;
2790                                 vm_map_t                copy_submap;
2791                                 vm_object_t             copy_object;
2792                                 vm_map_size_t           copy_size;
2793                                 vm_object_offset_t      copy_offset;
2794
2795                                 copy_offset = copy_entry->offset;
2796                                 copy_size = (copy_entry->vme_end -
2797                                              copy_entry->vme_start);
2798
2799                                 /* sanity check */
2800                                 if ((copy_addr + copy_size) >
2801                                     (map_addr +
2802                                      named_entry->size /* XXX full size */ )) {
2803                                         /* over-mapping too much !? */
2804                                         kr = KERN_INVALID_ARGUMENT;
2805                                         /* abort */
2806                                         break;
2807                                 }
2808
2809                                 /* take a reference on the object */
2810                                 if (copy_entry->is_sub_map) {
2811                                         remap_flags |= VM_FLAGS_SUBMAP;
2812                                         copy_submap =
2813                                                 copy_entry->object.sub_map;
2814                                         vm_map_lock(copy_submap);
2815                                         vm_map_reference(copy_submap);
2816                                         vm_map_unlock(copy_submap);
2817                                         copy_object = (vm_object_t) copy_submap;
2818                                 } else {
2819                                         copy_object =
2820                                                 copy_entry->object.vm_object;
2821                                         vm_object_reference(copy_object);
2822                                 }
2823
2824                                 /* over-map the object into destination */
2825                                 remap_flags |= flags;
2826                                 remap_flags |= VM_FLAGS_FIXED;
2827                                 remap_flags |= VM_FLAGS_OVERWRITE;
2828                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
2829                                 kr = vm_map_enter(target_map,
2830                                                   &copy_addr,
2831                                                   copy_size,
2832                                                   (vm_map_offset_t) 0,
2833                                                   remap_flags,
2834                                                   copy_object,
2835                                                   copy_offset,
2836                                                   copy,
2837                                                   cur_protection,
2838                                                   max_protection,
2839                                                   inheritance);
2840                                 if (kr != KERN_SUCCESS) {
2841                                         if (copy_entry->is_sub_map) {
2842                                                 vm_map_deallocate(copy_submap);
2843                                         } else {
2844                                                 vm_object_deallocate(copy_object);
2845                                         }
2846                                         /* abort */
2847                                         break;
2848                                 }
2849
2850                                 /* next mapping */
2851                                 copy_addr += copy_size;
2852                         }
2853
2854                         if (kr == KERN_SUCCESS) {
2855                                 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2856                                         *address = map_addr + offset_in_mapping;
2857                                 } else {
2858                                         *address = map_addr;
2859                                 }
2860
2861                                 if (offset) {
2862                                         /*
2863                                          * Trim in front, from 0 to "offset".
2864                                          */
2865                                         vm_map_remove(target_map,
2866                                                       map_addr,
2867                                                       map_addr + offset,
2868                                                       0);
2869                                         *address += offset;
2870                                 }
2871                                 if (offset + map_size < named_entry->size) {
2872                                         /*
2873                                          * Trim in back, from
2874                                          * "offset + map_size" to
2875                                          * "named_entry->size".
2876                                          */
2877                                         vm_map_remove(target_map,
2878                                                       (map_addr +
2879                                                        offset + map_size),
2880                                                       (map_addr +
2881                                                        named_entry->size),
2882                                                       0);
2883                                 }
2884                         }
2885                         named_entry_unlock(named_entry);
2886
2887                         if (kr != KERN_SUCCESS) {
2888                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
2889                                         /* deallocate the contiguous range */
2890                                         (void) vm_deallocate(target_map,
2891                                                              map_addr,
2892                                                              map_size);
2893                                 }
2894                         }
2895
2896                         return kr;
2897
2898                 } else {
2899                         /* This is the case where we are going to map */
2900                         /* an already mapped object.  If the object is */
2901                         /* not ready it is internal.  An external     */
2902                         /* object cannot be mapped until it is ready  */
2903                         /* we can therefore avoid the ready check     */
2904                         /* in this case.  */
2905                         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2906                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
2907                                 offset = vm_object_trunc_page(offset);
2908                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
2909                         }
2910
2911                         object = named_entry->backing.object;
2912                         assert(object != VM_OBJECT_NULL);
2913                         named_entry_unlock(named_entry);
2914                         vm_object_reference(object);
2915                 }
2916         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
2917                 /*
2918                  * JMM - This is temporary until we unify named entries
2919                  * and raw memory objects.
2920                  *
2921                  * Detected fake ip_kotype for a memory object.  In
2922                  * this case, the port isn't really a port at all, but
2923                  * instead is just a raw memory object.
2924                  */
2925                 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
2926                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
2927                 }
2928
2929                 object = vm_object_enter((memory_object_t)port,
2930                                          size, FALSE, FALSE, FALSE);
2931                 if (object == VM_OBJECT_NULL)
2932                         return KERN_INVALID_OBJECT;
2933
2934                 /* wait for object (if any) to be ready */
2935                 if (object != VM_OBJECT_NULL) {
2936                         if (object == kernel_object) {
2937                                 printf("Warning: Attempt to map kernel object"
2938                                         " by a non-private kernel entity\n");
2939                                 return KERN_INVALID_OBJECT;
2940                         }
2941                         if (!object->pager_ready) {
2942                                 vm_object_lock(object);
2943
2944                                 while (!object->pager_ready) {
2945                                         vm_object_wait(object,
2946                                                        VM_OBJECT_EVENT_PAGER_READY,
2947                                                        THREAD_UNINT);
2948                                         vm_object_lock(object);
2949                                 }
2950                                 vm_object_unlock(object);
2951                         }
2952                 }
2953         } else {
2954                 return KERN_INVALID_OBJECT;
2955         }
2956
2957         if (object != VM_OBJECT_NULL &&
2958             object->named &&
2959             object->pager != MEMORY_OBJECT_NULL &&
2960             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2961                 memory_object_t pager;
2962                 vm_prot_t       pager_prot;
2963                 kern_return_t   kr;
2964
2965                 /*
2966                  * For "named" VM objects, let the pager know that the
2967                  * memory object is being mapped.  Some pagers need to keep
2968                  * track of this, to know when they can reclaim the memory
2969                  * object, for example.
2970                  * VM calls memory_object_map() for each mapping (specifying
2971                  * the protection of each mapping) and calls
2972                  * memory_object_last_unmap() when all the mappings are gone.
2973                  */
2974                 pager_prot = max_protection;
2975                 if (copy) {
2976                         /*
2977                          * Copy-On-Write mapping: won't modify the
2978                          * memory object.
2979                          */
2980                         pager_prot &= ~VM_PROT_WRITE;
2981                 }
2982                 vm_object_lock(object);
2983                 pager = object->pager;
2984                 if (object->named &&
2985                     pager != MEMORY_OBJECT_NULL &&
2986                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
2987                         assert(object->pager_ready);
2988                         vm_object_mapping_wait(object, THREAD_UNINT);
2989                         vm_object_mapping_begin(object);
2990                         vm_object_unlock(object);
2991
2992                         kr = memory_object_map(pager, pager_prot);
2993                         assert(kr == KERN_SUCCESS);
2994
2995                         vm_object_lock(object);
2996                         vm_object_mapping_end(object);
2997                 }
2998                 vm_object_unlock(object);
2999         }
3000
3001         /*
3002          *      Perform the copy if requested
3003          */
3004
3005         if (copy) {
3006                 vm_object_t             new_object;
3007                 vm_object_offset_t      new_offset;
3008
3009                 result = vm_object_copy_strategically(object, offset, size,
3010                                                       &new_object, &new_offset,
3011                                                       &copy);
3012
3013
3014                 if (result == KERN_MEMORY_RESTART_COPY) {
3015                         boolean_t success;
3016                         boolean_t src_needs_copy;
3017
3018                         /*
3019                          * XXX
3020                          * We currently ignore src_needs_copy.
3021                          * This really is the issue of how to make
3022                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3023                          * non-kernel users to use. Solution forthcoming.
3024                          * In the meantime, since we don't allow non-kernel
3025                          * memory managers to specify symmetric copy,
3026                          * we won't run into problems here.
3027                          */
3028                         new_object = object;
3029                         new_offset = offset;
3030                         success = vm_object_copy_quickly(&new_object,
3031                                                          new_offset, size,
3032                                                          &src_needs_copy,
3033                                                          &copy);
3034                         assert(success);
3035                         result = KERN_SUCCESS;
3036                 }
3037                 /*
3038                  *      Throw away the reference to the
3039                  *      original object, as it won't be mapped.
3040                  */
3041
3042                 vm_object_deallocate(object);
3043
3044                 if (result != KERN_SUCCESS)
3045                         return result;
3046
3047                 object = new_object;
3048                 offset = new_offset;
3049         }
3050
3051         /*
3052          * If users want to try to prefault pages, the mapping and prefault
3053          * needs to be atomic.
3054          */
3055         if (try_prefault)
3056                 flags |= VM_FLAGS_KEEP_MAP_LOCKED;
3057         result = vm_map_enter(target_map,
3058                               &map_addr, map_size,
3059                               (vm_map_offset_t)mask,
3060                               flags,
3061                               object, offset,
3062                               copy,
3063                               cur_protection, max_protection, inheritance);
3064         if (result != KERN_SUCCESS)
3065                 vm_object_deallocate(object);
3066
3067         /*
3068          * Try to prefault, and do not forget to release the vm map lock.
3069          */
3070         if (result == KERN_SUCCESS && try_prefault) {
3071                 mach_vm_address_t va = map_addr;
3072                 kern_return_t kr = KERN_SUCCESS;
3073                 unsigned int i = 0;
3074
3075                 for (i = 0; i < page_list_count; ++i) {
3076                         if (UPL_VALID_PAGE(page_list, i)) {
3077                                 /*
3078                                  * If this function call failed, we should stop
3079                                  * trying to optimize, other calls are likely
3080                                  * going to fail too.
3081                                  *
3082                                  * We are not gonna report an error for such
3083                                  * failure though. That's an optimization, not
3084                                  * something critical.
3085                                  */
3086                                 kr = pmap_enter_options(target_map->pmap,
3087                                                         va, UPL_PHYS_PAGE(page_list, i),
3088                                                         cur_protection, VM_PROT_NONE,
3089                                                         0, TRUE, PMAP_OPTIONS_NOWAIT, NULL);
3090                                 if (kr != KERN_SUCCESS) {
3091                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
3092                                         goto BailOut;
3093                                 }
3094                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
3095                         }
3096
3097                         /* Next virtual address */
3098                         va += PAGE_SIZE;
3099                 }
3100 BailOut:
3101                 vm_map_unlock(target_map);
3102         }
3103
3104         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
3105                 *address = map_addr + offset_in_mapping;
3106         } else {
3107                 *address = map_addr;
3108         }
3109         return result;
3110 }
3111
3112 kern_return_t
3113 vm_map_enter_mem_object(
3114         vm_map_t                target_map,
3115         vm_map_offset_t         *address,
3116         vm_map_size_t           initial_size,
3117         vm_map_offset_t         mask,
3118         int                     flags,
3119         ipc_port_t              port,
3120         vm_object_offset_t      offset,
3121         boolean_t               copy,
3122         vm_prot_t               cur_protection,
3123         vm_prot_t               max_protection,
3124         vm_inherit_t            inheritance)
3125 {
3126         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3127                                               port, offset, copy, cur_protection, max_protection,
3128                                               inheritance, NULL, 0);
3129 }
3130
3131 kern_return_t
3132 vm_map_enter_mem_object_prefault(
3133         vm_map_t                target_map,
3134         vm_map_offset_t         *address,
3135         vm_map_size_t           initial_size,
3136         vm_map_offset_t         mask,
3137         int                     flags,
3138         ipc_port_t              port,
3139         vm_object_offset_t      offset,
3140         vm_prot_t               cur_protection,
3141         vm_prot_t               max_protection,
3142         upl_page_list_ptr_t     page_list,
3143         unsigned int            page_list_count)
3144 {
3145         return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags,
3146                                               port, offset, FALSE, cur_protection, max_protection,
3147                                               VM_INHERIT_DEFAULT, page_list, page_list_count);
3148 }
3149
3150
3151 kern_return_t
3152 vm_map_enter_mem_object_control(
3153         vm_map_t                target_map,
3154         vm_map_offset_t         *address,
3155         vm_map_size_t           initial_size,
3156         vm_map_offset_t         mask,
3157         int                     flags,
3158         memory_object_control_t control,
3159         vm_object_offset_t      offset,
3160         boolean_t               copy,
3161         vm_prot_t               cur_protection,
3162         vm_prot_t               max_protection,
3163         vm_inherit_t            inheritance)
3164 {
3165         vm_map_address_t        map_addr;
3166         vm_map_size_t           map_size;
3167         vm_object_t             object;
3168         vm_object_size_t        size;
3169         kern_return_t           result;
3170         memory_object_t         pager;
3171         vm_prot_t               pager_prot;
3172         kern_return_t           kr;
3173
3174         /*
3175          * Check arguments for validity
3176          */
3177         if ((target_map == VM_MAP_NULL) ||
3178             (cur_protection & ~VM_PROT_ALL) ||
3179             (max_protection & ~VM_PROT_ALL) ||
3180             (inheritance > VM_INHERIT_LAST_VALID) ||
3181             initial_size == 0)
3182                 return KERN_INVALID_ARGUMENT;
3183
3184         map_addr = vm_map_trunc_page(*address,
3185                                      VM_MAP_PAGE_MASK(target_map));
3186         map_size = vm_map_round_page(initial_size,
3187                                      VM_MAP_PAGE_MASK(target_map));
3188         size = vm_object_round_page(initial_size);
3189
3190         object = memory_object_control_to_vm_object(control);
3191
3192         if (object == VM_OBJECT_NULL)
3193                 return KERN_INVALID_OBJECT;
3194
3195         if (object == kernel_object) {
3196                 printf("Warning: Attempt to map kernel object"
3197                        " by a non-private kernel entity\n");
3198                 return KERN_INVALID_OBJECT;
3199         }
3200
3201         vm_object_lock(object);
3202         object->ref_count++;
3203         vm_object_res_reference(object);
3204
3205         /*
3206          * For "named" VM objects, let the pager know that the
3207          * memory object is being mapped.  Some pagers need to keep
3208          * track of this, to know when they can reclaim the memory
3209          * object, for example.
3210          * VM calls memory_object_map() for each mapping (specifying
3211          * the protection of each mapping) and calls
3212          * memory_object_last_unmap() when all the mappings are gone.
3213          */
3214         pager_prot = max_protection;
3215         if (copy) {
3216                 pager_prot &= ~VM_PROT_WRITE;
3217         }
3218         pager = object->pager;
3219         if (object->named &&
3220             pager != MEMORY_OBJECT_NULL &&
3221             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
3222                 assert(object->pager_ready);
3223                 vm_object_mapping_wait(object, THREAD_UNINT);
3224                 vm_object_mapping_begin(object);
3225                 vm_object_unlock(object);
3226
3227                 kr = memory_object_map(pager, pager_prot);
3228                 assert(kr == KERN_SUCCESS);
3229
3230                 vm_object_lock(object);
3231                 vm_object_mapping_end(object);
3232         }
3233         vm_object_unlock(object);
3234
3235         /*
3236          *      Perform the copy if requested
3237          */
3238
3239         if (copy) {
3240                 vm_object_t             new_object;
3241                 vm_object_offset_t      new_offset;
3242
3243                 result = vm_object_copy_strategically(object, offset, size,
3244                                                       &new_object, &new_offset,
3245                                                       &copy);
3246
3247
3248                 if (result == KERN_MEMORY_RESTART_COPY) {
3249                         boolean_t success;
3250                         boolean_t src_needs_copy;
3251
3252                         /*
3253                          * XXX
3254                          * We currently ignore src_needs_copy.
3255                          * This really is the issue of how to make
3256                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
3257                          * non-kernel users to use. Solution forthcoming.
3258                          * In the meantime, since we don't allow non-kernel
3259                          * memory managers to specify symmetric copy,
3260                          * we won't run into problems here.
3261                          */
3262                         new_object = object;
3263                         new_offset = offset;
3264                         success = vm_object_copy_quickly(&new_object,
3265                                                          new_offset, size,
3266                                                          &src_needs_copy,
3267                                                          &copy);
3268                         assert(success);
3269                         result = KERN_SUCCESS;
3270                 }
3271                 /*
3272                  *      Throw away the reference to the
3273                  *      original object, as it won't be mapped.
3274                  */
3275
3276                 vm_object_deallocate(object);
3277
3278                 if (result != KERN_SUCCESS)
3279                         return result;
3280
3281                 object = new_object;
3282                 offset = new_offset;
3283         }
3284
3285         result = vm_map_enter(target_map,
3286                               &map_addr, map_size,
3287                               (vm_map_offset_t)mask,
3288                               flags,
3289                               object, offset,
3290                               copy,
3291                               cur_protection, max_protection, inheritance);
3292         if (result != KERN_SUCCESS)
3293                 vm_object_deallocate(object);
3294         *address = map_addr;
3295
3296         return result;
3297 }
3298
3299
3300 #if     VM_CPM
3301
3302 #ifdef MACH_ASSERT
3303 extern pmap_paddr_t     avail_start, avail_end;
3304 #endif
3305
3306 /*
3307  *      Allocate memory in the specified map, with the caveat that
3308  *      the memory is physically contiguous.  This call may fail
3309  *      if the system can't find sufficient contiguous memory.
3310  *      This call may cause or lead to heart-stopping amounts of
3311  *      paging activity.
3312  *
3313  *      Memory obtained from this call should be freed in the
3314  *      normal way, viz., via vm_deallocate.
3315  */
3316 kern_return_t
3317 vm_map_enter_cpm(
3318         vm_map_t                map,
3319         vm_map_offset_t *addr,
3320         vm_map_size_t           size,
3321         int                     flags)
3322 {
3323         vm_object_t             cpm_obj;
3324         pmap_t                  pmap;
3325         vm_page_t               m, pages;
3326         kern_return_t           kr;
3327         vm_map_offset_t         va, start, end, offset;
3328 #if     MACH_ASSERT
3329         vm_map_offset_t         prev_addr = 0;
3330 #endif  /* MACH_ASSERT */
3331
3332         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
3333
3334         if (size == 0) {
3335                 *addr = 0;
3336                 return KERN_SUCCESS;
3337         }
3338         if (anywhere)
3339                 *addr = vm_map_min(map);
3340         else
3341                 *addr = vm_map_trunc_page(*addr,
3342                                           VM_MAP_PAGE_MASK(map));
3343         size = vm_map_round_page(size,
3344                                  VM_MAP_PAGE_MASK(map));
3345
3346         /*
3347          * LP64todo - cpm_allocate should probably allow
3348          * allocations of >4GB, but not with the current
3349          * algorithm, so just cast down the size for now.
3350          */
3351         if (size > VM_MAX_ADDRESS)
3352                 return KERN_RESOURCE_SHORTAGE;
3353         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
3354                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
3355                 return kr;
3356
3357         cpm_obj = vm_object_allocate((vm_object_size_t)size);
3358         assert(cpm_obj != VM_OBJECT_NULL);
3359         assert(cpm_obj->internal);
3360         assert(cpm_obj->vo_size == (vm_object_size_t)size);
3361         assert(cpm_obj->can_persist == FALSE);
3362         assert(cpm_obj->pager_created == FALSE);
3363         assert(cpm_obj->pageout == FALSE);
3364         assert(cpm_obj->shadow == VM_OBJECT_NULL);
3365
3366         /*
3367          *      Insert pages into object.
3368          */
3369
3370         vm_object_lock(cpm_obj);
3371         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3372                 m = pages;
3373                 pages = NEXT_PAGE(m);
3374                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3375
3376                 assert(!m->gobbled);
3377                 assert(!m->wanted);
3378                 assert(!m->pageout);
3379                 assert(!m->tabled);
3380                 assert(VM_PAGE_WIRED(m));
3381                 /*
3382                  * ENCRYPTED SWAP:
3383                  * "m" is not supposed to be pageable, so it
3384                  * should not be encrypted.  It wouldn't be safe
3385                  * to enter it in a new VM object while encrypted.
3386                  */
3387                 ASSERT_PAGE_DECRYPTED(m);
3388                 assert(m->busy);
3389                 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT));
3390
3391                 m->busy = FALSE;
3392                 vm_page_insert(m, cpm_obj, offset);
3393         }
3394         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
3395         vm_object_unlock(cpm_obj);
3396
3397         /*
3398          *      Hang onto a reference on the object in case a
3399          *      multi-threaded application for some reason decides
3400          *      to deallocate the portion of the address space into
3401          *      which we will insert this object.
3402          *
3403          *      Unfortunately, we must insert the object now before
3404          *      we can talk to the pmap module about which addresses
3405          *      must be wired down.  Hence, the race with a multi-
3406          *      threaded app.
3407          */
3408         vm_object_reference(cpm_obj);
3409
3410         /*
3411          *      Insert object into map.
3412          */
3413
3414         kr = vm_map_enter(
3415                 map,
3416                 addr,
3417                 size,
3418                 (vm_map_offset_t)0,
3419                 flags,
3420                 cpm_obj,
3421                 (vm_object_offset_t)0,
3422                 FALSE,
3423                 VM_PROT_ALL,
3424                 VM_PROT_ALL,
3425                 VM_INHERIT_DEFAULT);
3426
3427         if (kr != KERN_SUCCESS) {
3428                 /*
3429                  *      A CPM object doesn't have can_persist set,
3430                  *      so all we have to do is deallocate it to
3431                  *      free up these pages.
3432                  */
3433                 assert(cpm_obj->pager_created == FALSE);
3434                 assert(cpm_obj->can_persist == FALSE);
3435                 assert(cpm_obj->pageout == FALSE);
3436                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
3437                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
3438                 vm_object_deallocate(cpm_obj); /* kill creation ref */
3439         }
3440
3441         /*
3442          *      Inform the physical mapping system that the
3443          *      range of addresses may not fault, so that
3444          *      page tables and such can be locked down as well.
3445          */
3446         start = *addr;
3447         end = start + size;
3448         pmap = vm_map_pmap(map);
3449         pmap_pageable(pmap, start, end, FALSE);
3450
3451         /*
3452          *      Enter each page into the pmap, to avoid faults.
3453          *      Note that this loop could be coded more efficiently,
3454          *      if the need arose, rather than looking up each page
3455          *      again.
3456          */
3457         for (offset = 0, va = start; offset < size;
3458              va += PAGE_SIZE, offset += PAGE_SIZE) {
3459                 int type_of_fault;
3460
3461                 vm_object_lock(cpm_obj);
3462                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3463                 assert(m != VM_PAGE_NULL);
3464
3465                 vm_page_zero_fill(m);
3466
3467                 type_of_fault = DBG_ZERO_FILL_FAULT;
3468
3469                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
3470                                VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL,
3471                                &type_of_fault);
3472
3473                 vm_object_unlock(cpm_obj);
3474         }
3475
3476 #if     MACH_ASSERT
3477         /*
3478          *      Verify ordering in address space.
3479          */
3480         for (offset = 0; offset < size; offset += PAGE_SIZE) {
3481                 vm_object_lock(cpm_obj);
3482                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
3483                 vm_object_unlock(cpm_obj);
3484                 if (m == VM_PAGE_NULL)
3485                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
3486                               cpm_obj, (uint64_t)offset);
3487                 assert(m->tabled);
3488                 assert(!m->busy);
3489                 assert(!m->wanted);
3490                 assert(!m->fictitious);
3491                 assert(!m->private);
3492                 assert(!m->absent);
3493                 assert(!m->error);
3494                 assert(!m->cleaning);
3495                 assert(!m->laundry);
3496                 assert(!m->precious);
3497                 assert(!m->clustered);
3498                 if (offset != 0) {
3499                         if (m->phys_page != prev_addr + 1) {
3500                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
3501                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
3502                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
3503                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
3504                                 panic("vm_allocate_cpm:  pages not contig!");
3505                         }
3506                 }
3507                 prev_addr = m->phys_page;
3508         }
3509 #endif  /* MACH_ASSERT */
3510
3511         vm_object_deallocate(cpm_obj); /* kill extra ref */
3512
3513         return kr;
3514 }
3515
3516
3517 #else   /* VM_CPM */
3518
3519 /*
3520  *      Interface is defined in all cases, but unless the kernel
3521  *      is built explicitly for this option, the interface does
3522  *      nothing.
3523  */
3524
3525 kern_return_t
3526 vm_map_enter_cpm(
3527         __unused vm_map_t       map,
3528         __unused vm_map_offset_t        *addr,
3529         __unused vm_map_size_t  size,
3530         __unused int            flags)
3531 {
3532         return KERN_FAILURE;
3533 }
3534 #endif /* VM_CPM */
3535
3536 /* Not used without nested pmaps */
3537 #ifndef NO_NESTED_PMAP
3538 /*
3539  * Clip and unnest a portion of a nested submap mapping.
3540  */
3541
3542
3543 static void
3544 vm_map_clip_unnest(
3545         vm_map_t        map,
3546         vm_map_entry_t  entry,
3547         vm_map_offset_t start_unnest,
3548         vm_map_offset_t end_unnest)
3549 {
3550         vm_map_offset_t old_start_unnest = start_unnest;
3551         vm_map_offset_t old_end_unnest = end_unnest;
3552
3553         assert(entry->is_sub_map);
3554         assert(entry->object.sub_map != NULL);
3555         assert(entry->use_pmap);
3556
3557         /*
3558          * Query the platform for the optimal unnest range.
3559          * DRK: There's some duplication of effort here, since
3560          * callers may have adjusted the range to some extent. This
3561          * routine was introduced to support 1GiB subtree nesting
3562          * for x86 platforms, which can also nest on 2MiB boundaries
3563          * depending on size/alignment.
3564          */
3565         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
3566                 log_unnest_badness(map, old_start_unnest, old_end_unnest);
3567         }
3568
3569         if (entry->vme_start > start_unnest ||
3570             entry->vme_end < end_unnest) {
3571                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
3572                       "bad nested entry: start=0x%llx end=0x%llx\n",
3573                       (long long)start_unnest, (long long)end_unnest,
3574                       (long long)entry->vme_start, (long long)entry->vme_end);
3575         }
3576
3577         if (start_unnest > entry->vme_start) {
3578                 _vm_map_clip_start(&map->hdr,
3579                                    entry,
3580                                    start_unnest);
3581                 vm_map_store_update_first_free(map, map->first_free);
3582         }
3583         if (entry->vme_end > end_unnest) {
3584                 _vm_map_clip_end(&map->hdr,
3585                                  entry,
3586                                  end_unnest);
3587                 vm_map_store_update_first_free(map, map->first_free);
3588         }
3589
3590         pmap_unnest(map->pmap,
3591                     entry->vme_start,
3592                     entry->vme_end - entry->vme_start);
3593         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
3594                 /* clean up parent map/maps */
3595                 vm_map_submap_pmap_clean(
3596                         map, entry->vme_start,
3597                         entry->vme_end,
3598                         entry->object.sub_map,
3599                         entry->offset);
3600         }
3601         entry->use_pmap = FALSE;
3602         if (entry->alias == VM_MEMORY_SHARED_PMAP) {
3603                 entry->alias = VM_MEMORY_UNSHARED_PMAP;
3604         }
3605 }
3606 #endif  /* NO_NESTED_PMAP */
3607
3608 /*
3609  *      vm_map_clip_start:      [ internal use only ]
3610  *
3611  *      Asserts that the given entry begins at or after
3612  *      the specified address; if necessary,
3613  *      it splits the entry into two.
3614  */
3615 void
3616 vm_map_clip_start(
3617         vm_map_t        map,
3618         vm_map_entry_t  entry,
3619         vm_map_offset_t startaddr)
3620 {
3621 #ifndef NO_NESTED_PMAP
3622         if (entry->is_sub_map &&
3623             entry->use_pmap &&
3624             startaddr >= entry->vme_start) {
3625                 vm_map_offset_t start_unnest, end_unnest;
3626
3627                 /*
3628                  * Make sure "startaddr" is no longer in a nested range
3629                  * before we clip.  Unnest only the minimum range the platform
3630                  * can handle.
3631                  * vm_map_clip_unnest may perform additional adjustments to
3632                  * the unnest range.
3633                  */
3634                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
3635                 end_unnest = start_unnest + pmap_nesting_size_min;
3636                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3637         }
3638 #endif /* NO_NESTED_PMAP */
3639         if (startaddr > entry->vme_start) {
3640                 if (entry->object.vm_object &&
3641                     !entry->is_sub_map &&
3642                     entry->object.vm_object->phys_contiguous) {
3643                         pmap_remove(map->pmap,
3644                                     (addr64_t)(entry->vme_start),
3645                                     (addr64_t)(entry->vme_end));
3646                 }
3647                 _vm_map_clip_start(&map->hdr, entry, startaddr);
3648                 vm_map_store_update_first_free(map, map->first_free);
3649         }
3650 }
3651
3652
3653 #define vm_map_copy_clip_start(copy, entry, startaddr) \
3654         MACRO_BEGIN \
3655         if ((startaddr) > (entry)->vme_start) \
3656                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
3657         MACRO_END
3658
3659 /*
3660  *      This routine is called only when it is known that
3661  *      the entry must be split.
3662  */
3663 static void
3664 _vm_map_clip_start(
3665         register struct vm_map_header   *map_header,
3666         register vm_map_entry_t         entry,
3667         register vm_map_offset_t                start)
3668 {
3669         register vm_map_entry_t new_entry;
3670
3671         /*
3672          *      Split off the front portion --
3673          *      note that we must insert the new
3674          *      entry BEFORE this one, so that
3675          *      this entry has the specified starting
3676          *      address.
3677          */
3678
3679         if (entry->map_aligned) {
3680                 assert(VM_MAP_PAGE_ALIGNED(start,
3681                                            VM_MAP_HDR_PAGE_MASK(map_header)));
3682         }
3683
3684         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3685         vm_map_entry_copy_full(new_entry, entry);
3686
3687         new_entry->vme_end = start;
3688         assert(new_entry->vme_start < new_entry->vme_end);
3689         entry->offset += (start - entry->vme_start);
3690         assert(start < entry->vme_end);
3691         entry->vme_start = start;
3692
3693         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
3694
3695         if (entry->is_sub_map)
3696                 vm_map_reference(new_entry->object.sub_map);
3697         else
3698                 vm_object_reference(new_entry->object.vm_object);
3699 }
3700
3701
3702 /*
3703  *      vm_map_clip_end:        [ internal use only ]
3704  *
3705  *      Asserts that the given entry ends at or before
3706  *      the specified address; if necessary,
3707  *      it splits the entry into two.
3708  */
3709 void
3710 vm_map_clip_end(
3711         vm_map_t        map,
3712         vm_map_entry_t  entry,
3713         vm_map_offset_t endaddr)
3714 {
3715         if (endaddr > entry->vme_end) {
3716                 /*
3717                  * Within the scope of this clipping, limit "endaddr" to
3718                  * the end of this map entry...
3719                  */
3720                 endaddr = entry->vme_end;
3721         }
3722 #ifndef NO_NESTED_PMAP
3723         if (entry->is_sub_map && entry->use_pmap) {
3724                 vm_map_offset_t start_unnest, end_unnest;
3725
3726                 /*
3727                  * Make sure the range between the start of this entry and
3728                  * the new "endaddr" is no longer nested before we clip.
3729                  * Unnest only the minimum range the platform can handle.
3730                  * vm_map_clip_unnest may perform additional adjustments to
3731                  * the unnest range.
3732                  */
3733                 start_unnest = entry->vme_start;
3734                 end_unnest =
3735                         (endaddr + pmap_nesting_size_min - 1) &
3736                         ~(pmap_nesting_size_min - 1);
3737                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
3738         }
3739 #endif /* NO_NESTED_PMAP */
3740         if (endaddr < entry->vme_end) {
3741                 if (entry->object.vm_object &&
3742                     !entry->is_sub_map &&
3743                     entry->object.vm_object->phys_contiguous) {
3744                         pmap_remove(map->pmap,
3745                                     (addr64_t)(entry->vme_start),
3746                                     (addr64_t)(entry->vme_end));
3747                 }
3748                 _vm_map_clip_end(&map->hdr, entry, endaddr);
3749                 vm_map_store_update_first_free(map, map->first_free);
3750         }
3751 }
3752
3753
3754 #define vm_map_copy_clip_end(copy, entry, endaddr) \
3755         MACRO_BEGIN \
3756         if ((endaddr) < (entry)->vme_end) \
3757                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
3758         MACRO_END
3759
3760 /*
3761  *      This routine is called only when it is known that
3762  *      the entry must be split.
3763  */
3764 static void
3765 _vm_map_clip_end(
3766         register struct vm_map_header   *map_header,
3767         register vm_map_entry_t         entry,
3768         register vm_map_offset_t        end)
3769 {
3770         register vm_map_entry_t new_entry;
3771
3772         /*
3773          *      Create a new entry and insert it
3774          *      AFTER the specified entry
3775          */
3776
3777         if (entry->map_aligned) {
3778                 assert(VM_MAP_PAGE_ALIGNED(end,
3779                                            VM_MAP_HDR_PAGE_MASK(map_header)));
3780         }
3781
3782         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
3783         vm_map_entry_copy_full(new_entry, entry);
3784
3785         assert(entry->vme_start < end);
3786         new_entry->vme_start = entry->vme_end = end;
3787         new_entry->offset += (end - entry->vme_start);
3788         assert(new_entry->vme_start < new_entry->vme_end);
3789
3790         _vm_map_store_entry_link(map_header, entry, new_entry);
3791
3792         if (entry->is_sub_map)
3793                 vm_map_reference(new_entry->object.sub_map);
3794         else
3795                 vm_object_reference(new_entry->object.vm_object);
3796 }
3797
3798
3799 /*
3800  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
3801  *
3802  *      Asserts that the starting and ending region
3803  *      addresses fall within the valid range of the map.
3804  */
3805 #define VM_MAP_RANGE_CHECK(map, start, end)     \
3806         MACRO_BEGIN                             \
3807         if (start < vm_map_min(map))            \
3808                 start = vm_map_min(map);        \
3809         if (end > vm_map_max(map))              \
3810                 end = vm_map_max(map);          \
3811         if (start > end)                        \
3812                 start = end;                    \
3813         MACRO_END
3814
3815 /*
3816  *      vm_map_range_check:     [ internal use only ]
3817  *
3818  *      Check that the region defined by the specified start and
3819  *      end addresses are wholly contained within a single map
3820  *      entry or set of adjacent map entries of the spacified map,
3821  *      i.e. the specified region contains no unmapped space.
3822  *      If any or all of the region is unmapped, FALSE is returned.
3823  *      Otherwise, TRUE is returned and if the output argument 'entry'
3824  *      is not NULL it points to the map entry containing the start
3825  *      of the region.
3826  *
3827  *      The map is locked for reading on entry and is left locked.
3828  */
3829 static boolean_t
3830 vm_map_range_check(
3831         register vm_map_t       map,
3832         register vm_map_offset_t        start,
3833         register vm_map_offset_t        end,
3834         vm_map_entry_t          *entry)
3835 {
3836         vm_map_entry_t          cur;
3837         register vm_map_offset_t        prev;
3838
3839         /*
3840          *      Basic sanity checks first
3841          */
3842         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
3843                 return (FALSE);
3844
3845         /*
3846          *      Check first if the region starts within a valid
3847          *      mapping for the map.
3848          */
3849         if (!vm_map_lookup_entry(map, start, &cur))
3850                 return (FALSE);
3851
3852         /*
3853          *      Optimize for the case that the region is contained
3854          *      in a single map entry.
3855          */
3856         if (entry != (vm_map_entry_t *) NULL)
3857                 *entry = cur;
3858         if (end <= cur->vme_end)
3859                 return (TRUE);
3860
3861         /*
3862          *      If the region is not wholly contained within a
3863          *      single entry, walk the entries looking for holes.
3864          */
3865         prev = cur->vme_end;
3866         cur = cur->vme_next;
3867         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
3868                 if (end <= cur->vme_end)
3869                         return (TRUE);
3870                 prev = cur->vme_end;
3871                 cur = cur->vme_next;
3872         }
3873         return (FALSE);
3874 }
3875
3876 /*
3877  *      vm_map_submap:          [ kernel use only ]
3878  *
3879  *      Mark the given range as handled by a subordinate map.
3880  *
3881  *      This range must have been created with vm_map_find using
3882  *      the vm_submap_object, and no other operations may have been
3883  *      performed on this range prior to calling vm_map_submap.
3884  *
3885  *      Only a limited number of operations can be performed
3886  *      within this rage after calling vm_map_submap:
3887  *              vm_fault
3888  *      [Don't try vm_map_copyin!]
3889  *
3890  *      To remove a submapping, one must first remove the
3891  *      range from the superior map, and then destroy the
3892  *      submap (if desired).  [Better yet, don't try it.]
3893  */
3894 kern_return_t
3895 vm_map_submap(
3896         vm_map_t        map,
3897         vm_map_offset_t start,
3898         vm_map_offset_t end,
3899         vm_map_t        submap,
3900         vm_map_offset_t offset,
3901 #ifdef NO_NESTED_PMAP
3902         __unused
3903 #endif  /* NO_NESTED_PMAP */
3904         boolean_t       use_pmap)
3905 {
3906         vm_map_entry_t          entry;
3907         register kern_return_t  result = KERN_INVALID_ARGUMENT;
3908         register vm_object_t    object;
3909
3910         vm_map_lock(map);
3911
3912         if (! vm_map_lookup_entry(map, start, &entry)) {
3913                 entry = entry->vme_next;
3914         }
3915
3916         if (entry == vm_map_to_entry(map) ||
3917             entry->is_sub_map) {
3918                 vm_map_unlock(map);
3919                 return KERN_INVALID_ARGUMENT;
3920         }
3921
3922         vm_map_clip_start(map, entry, start);
3923         vm_map_clip_end(map, entry, end);
3924
3925         if ((entry->vme_start == start) && (entry->vme_end == end) &&
3926             (!entry->is_sub_map) &&
3927             ((object = entry->object.vm_object) == vm_submap_object) &&
3928             (object->resident_page_count == 0) &&
3929             (object->copy == VM_OBJECT_NULL) &&
3930             (object->shadow == VM_OBJECT_NULL) &&
3931             (!object->pager_created)) {
3932                 entry->offset = (vm_object_offset_t)offset;
3933                 entry->object.vm_object = VM_OBJECT_NULL;
3934                 vm_object_deallocate(object);
3935                 entry->is_sub_map = TRUE;
3936                 entry->use_pmap = FALSE;
3937                 entry->object.sub_map = submap;
3938                 vm_map_reference(submap);
3939                 if (submap->mapped_in_other_pmaps == FALSE &&
3940                     vm_map_pmap(submap) != PMAP_NULL &&
3941                     vm_map_pmap(submap) != vm_map_pmap(map)) {
3942                         /*
3943                          * This submap is being mapped in a map
3944                          * that uses a different pmap.
3945                          * Set its "mapped_in_other_pmaps" flag
3946                          * to indicate that we now need to
3947                          * remove mappings from all pmaps rather
3948                          * than just the submap's pmap.
3949                          */
3950                         submap->mapped_in_other_pmaps = TRUE;
3951                 }
3952
3953 #ifndef NO_NESTED_PMAP
3954                 if (use_pmap) {
3955                         /* nest if platform code will allow */
3956                         if(submap->pmap == NULL) {
3957                                 ledger_t ledger = map->pmap->ledger;
3958                                 submap->pmap = pmap_create(ledger,
3959                                                 (vm_map_size_t) 0, FALSE);
3960                                 if(submap->pmap == PMAP_NULL) {
3961                                         vm_map_unlock(map);
3962                                         return(KERN_NO_SPACE);
3963                                 }
3964                         }
3965                         result = pmap_nest(map->pmap,
3966                                            (entry->object.sub_map)->pmap,
3967                                            (addr64_t)start,
3968                                            (addr64_t)start,
3969                                            (uint64_t)(end - start));
3970                         if(result)
3971                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
3972                         entry->use_pmap = TRUE;
3973                 }
3974 #else   /* NO_NESTED_PMAP */
3975                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
3976 #endif  /* NO_NESTED_PMAP */
3977                 result = KERN_SUCCESS;
3978         }
3979         vm_map_unlock(map);
3980
3981         return(result);
3982 }
3983
3984 /*
3985  *      vm_map_protect:
3986  *
3987  *      Sets the protection of the specified address
3988  *      region in the target map.  If "set_max" is
3989  *      specified, the maximum protection is to be set;
3990  *      otherwise, only the current protection is affected.
3991  */
3992 kern_return_t
3993 vm_map_protect(
3994         register vm_map_t       map,
3995         register vm_map_offset_t        start,
3996         register vm_map_offset_t        end,
3997         register vm_prot_t      new_prot,
3998         register boolean_t      set_max)
3999 {
4000         register vm_map_entry_t         current;
4001         register vm_map_offset_t        prev;
4002         vm_map_entry_t                  entry;
4003         vm_prot_t                       new_max;
4004
4005         XPR(XPR_VM_MAP,
4006             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
4007             map, start, end, new_prot, set_max);
4008
4009         vm_map_lock(map);
4010
4011         /* LP64todo - remove this check when vm_map_commpage64()
4012          * no longer has to stuff in a map_entry for the commpage
4013          * above the map's max_offset.
4014          */
4015         if (start >= map->max_offset) {
4016                 vm_map_unlock(map);
4017                 return(KERN_INVALID_ADDRESS);
4018         }
4019
4020         while(1) {
4021                 /*
4022                  *      Lookup the entry.  If it doesn't start in a valid
4023                  *      entry, return an error.
4024                  */
4025                 if (! vm_map_lookup_entry(map, start, &entry)) {
4026                         vm_map_unlock(map);
4027                         return(KERN_INVALID_ADDRESS);
4028                 }
4029
4030                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
4031                         start = SUPERPAGE_ROUND_DOWN(start);
4032                         continue;
4033                 }
4034                 break;
4035         }
4036         if (entry->superpage_size)
4037                 end = SUPERPAGE_ROUND_UP(end);
4038
4039         /*
4040          *      Make a first pass to check for protection and address
4041          *      violations.
4042          */
4043
4044         current = entry;
4045         prev = current->vme_start;
4046         while ((current != vm_map_to_entry(map)) &&
4047                (current->vme_start < end)) {
4048
4049                 /*
4050                  * If there is a hole, return an error.
4051                  */
4052                 if (current->vme_start != prev) {
4053                         vm_map_unlock(map);
4054                         return(KERN_INVALID_ADDRESS);
4055                 }
4056
4057                 new_max = current->max_protection;
4058                 if(new_prot & VM_PROT_COPY) {
4059                         new_max |= VM_PROT_WRITE;
4060                         if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) {
4061                                 vm_map_unlock(map);
4062                                 return(KERN_PROTECTION_FAILURE);
4063                         }
4064                 } else {
4065                         if ((new_prot & new_max) != new_prot) {
4066                                 vm_map_unlock(map);
4067                                 return(KERN_PROTECTION_FAILURE);
4068                         }
4069                 }
4070
4071
4072                 prev = current->vme_end;
4073                 current = current->vme_next;
4074         }
4075         if (end > prev) {
4076                 vm_map_unlock(map);
4077                 return(KERN_INVALID_ADDRESS);
4078         }
4079
4080         /*
4081          *      Go back and fix up protections.
4082          *      Clip to start here if the range starts within
4083          *      the entry.
4084          */
4085
4086         current = entry;
4087         if (current != vm_map_to_entry(map)) {
4088                 /* clip and unnest if necessary */
4089                 vm_map_clip_start(map, current, start);
4090         }
4091
4092         while ((current != vm_map_to_entry(map)) &&
4093                (current->vme_start < end)) {
4094
4095                 vm_prot_t       old_prot;
4096
4097                 vm_map_clip_end(map, current, end);
4098
4099                 if (current->is_sub_map) {
4100                         /* clipping did unnest if needed */
4101                         assert(!current->use_pmap);
4102                 }
4103
4104                 old_prot = current->protection;
4105
4106                 if(new_prot & VM_PROT_COPY) {
4107                         /* caller is asking specifically to copy the      */
4108                         /* mapped data, this implies that max protection  */
4109                         /* will include write.  Caller must be prepared   */
4110                         /* for loss of shared memory communication in the */
4111                         /* target area after taking this step */
4112
4113                         if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){
4114                                 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start));
4115                                 current->offset = 0;
4116                                 assert(current->use_pmap);
4117                         }
4118                         current->needs_copy = TRUE;
4119                         current->max_protection |= VM_PROT_WRITE;
4120                 }
4121
4122                 if (set_max)
4123                         current->protection =
4124                                 (current->max_protection =
4125                                  new_prot & ~VM_PROT_COPY) &
4126                                 old_prot;
4127                 else
4128                         current->protection = new_prot & ~VM_PROT_COPY;
4129
4130                 /*
4131                  *      Update physical map if necessary.
4132                  *      If the request is to turn off write protection,
4133                  *      we won't do it for real (in pmap). This is because
4134                  *      it would cause copy-on-write to fail.  We've already
4135                  *      set, the new protection in the map, so if a
4136                  *      write-protect fault occurred, it will be fixed up
4137                  *      properly, COW or not.
4138                  */
4139                 if (current->protection != old_prot) {
4140                         /* Look one level in we support nested pmaps */
4141                         /* from mapped submaps which are direct entries */
4142                         /* in our map */
4143
4144                         vm_prot_t prot;
4145
4146                         prot = current->protection & ~VM_PROT_WRITE;
4147
4148                         if (override_nx(map, current->alias) && prot)
4149                                 prot |= VM_PROT_EXECUTE;
4150
4151                         if (current->is_sub_map && current->use_pmap) {
4152                                 pmap_protect(current->object.sub_map->pmap,
4153                                              current->vme_start,
4154                                              current->vme_end,
4155                                              prot);
4156                         } else {
4157                                 pmap_protect(map->pmap,
4158                                              current->vme_start,
4159                                              current->vme_end,
4160                                              prot);
4161                         }
4162                 }
4163                 current = current->vme_next;
4164         }
4165
4166         current = entry;
4167         while ((current != vm_map_to_entry(map)) &&
4168                (current->vme_start <= end)) {
4169                 vm_map_simplify_entry(map, current);
4170                 current = current->vme_next;
4171         }
4172
4173         vm_map_unlock(map);
4174         return(KERN_SUCCESS);
4175 }
4176
4177 /*
4178  *      vm_map_inherit:
4179  *
4180  *      Sets the inheritance of the specified address
4181  *      range in the target map.  Inheritance
4182  *      affects how the map will be shared with
4183  *      child maps at the time of vm_map_fork.
4184  */
4185 kern_return_t
4186 vm_map_inherit(
4187         register vm_map_t       map,
4188         register vm_map_offset_t        start,
4189         register vm_map_offset_t        end,
4190         register vm_inherit_t   new_inheritance)
4191 {
4192         register vm_map_entry_t entry;
4193         vm_map_entry_t  temp_entry;
4194
4195         vm_map_lock(map);
4196
4197         VM_MAP_RANGE_CHECK(map, start, end);
4198
4199         if (vm_map_lookup_entry(map, start, &temp_entry)) {
4200                 entry = temp_entry;
4201         }
4202         else {
4203                 temp_entry = temp_entry->vme_next;
4204                 entry = temp_entry;
4205         }
4206
4207         /* first check entire range for submaps which can't support the */
4208         /* given inheritance. */
4209         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4210                 if(entry->is_sub_map) {
4211                         if(new_inheritance == VM_INHERIT_COPY) {
4212                                 vm_map_unlock(map);
4213                                 return(KERN_INVALID_ARGUMENT);
4214                         }
4215                 }
4216
4217                 entry = entry->vme_next;
4218         }
4219
4220         entry = temp_entry;
4221         if (entry != vm_map_to_entry(map)) {
4222                 /* clip and unnest if necessary */
4223                 vm_map_clip_start(map, entry, start);
4224         }
4225
4226         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
4227                 vm_map_clip_end(map, entry, end);
4228                 if (entry->is_sub_map) {
4229                         /* clip did unnest if needed */
4230                         assert(!entry->use_pmap);
4231                 }
4232
4233                 entry->inheritance = new_inheritance;
4234
4235                 entry = entry->vme_next;
4236         }
4237
4238         vm_map_unlock(map);
4239         return(KERN_SUCCESS);
4240 }
4241
4242 /*
4243  * Update the accounting for the amount of wired memory in this map.  If the user has
4244  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
4245  */
4246
4247 static kern_return_t
4248 add_wire_counts(
4249         vm_map_t        map,
4250         vm_map_entry_t  entry,
4251         boolean_t       user_wire)
4252 {
4253         vm_map_size_t   size;
4254
4255         if (user_wire) {
4256                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
4257
4258                 /*
4259                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
4260                  * this map entry.
4261                  */
4262
4263                 if (entry->user_wired_count == 0) {
4264                         size = entry->vme_end - entry->vme_start;
4265
4266                         /*
4267                          * Since this is the first time the user is wiring this map entry, check to see if we're
4268                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
4269                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
4270                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
4271                          * limit, then we fail.
4272                          */
4273
4274                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
4275                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
4276                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
4277                                 return KERN_RESOURCE_SHORTAGE;
4278
4279                         /*
4280                          * The first time the user wires an entry, we also increment the wired_count and add this to
4281                          * the total that has been wired in the map.
4282                          */
4283
4284                         if (entry->wired_count >= MAX_WIRE_COUNT)
4285                                 return KERN_FAILURE;
4286
4287                         entry->wired_count++;
4288                         map->user_wire_size += size;
4289                 }
4290
4291                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
4292                         return KERN_FAILURE;
4293
4294                 entry->user_wired_count++;
4295
4296         } else {
4297
4298                 /*
4299                  * The kernel's wiring the memory.  Just bump the count and continue.
4300                  */
4301
4302                 if (entry->wired_count >= MAX_WIRE_COUNT)
4303                         panic("vm_map_wire: too many wirings");
4304
4305                 entry->wired_count++;
4306         }
4307
4308         return KERN_SUCCESS;
4309 }
4310
4311 /*
4312  * Update the memory wiring accounting now that the given map entry is being unwired.
4313  */
4314
4315 static void
4316 subtract_wire_counts(
4317         vm_map_t        map,
4318         vm_map_entry_t  entry,
4319         boolean_t       user_wire)
4320 {
4321
4322         if (user_wire) {
4323
4324                 /*
4325                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
4326                  */
4327
4328                 if (entry->user_wired_count == 1) {
4329
4330                         /*
4331                          * We're removing the last user wire reference.  Decrement the wired_count and the total
4332                          * user wired memory for this map.
4333                          */
4334
4335                         assert(entry->wired_count >= 1);
4336                         entry->wired_count--;
4337                         map->user_wire_size -= entry->vme_end - entry->vme_start;
4338                 }
4339
4340                 assert(entry->user_wired_count >= 1);
4341                 entry->user_wired_count--;
4342
4343         } else {
4344
4345                 /*
4346                  * The kernel is unwiring the memory.   Just update the count.
4347                  */
4348
4349                 assert(entry->wired_count >= 1);
4350                 entry->wired_count--;
4351         }
4352 }
4353
4354 /*
4355  *      vm_map_wire:
4356  *
4357  *      Sets the pageability of the specified address range in the
4358  *      target map as wired.  Regions specified as not pageable require
4359  *      locked-down physical memory and physical page maps.  The
4360  *      access_type variable indicates types of accesses that must not
4361  *      generate page faults.  This is checked against protection of
4362  *      memory being locked-down.
4363  *
4364  *      The map must not be locked, but a reference must remain to the
4365  *      map throughout the call.
4366  */
4367 static kern_return_t
4368 vm_map_wire_nested(
4369         register vm_map_t       map,
4370         register vm_map_offset_t        start,
4371         register vm_map_offset_t        end,
4372         register vm_prot_t      access_type,
4373         boolean_t               user_wire,
4374         pmap_t                  map_pmap,
4375         vm_map_offset_t         pmap_addr,
4376         ppnum_t                 *physpage_p)
4377 {
4378         register vm_map_entry_t entry;
4379         struct vm_map_entry     *first_entry, tmp_entry;
4380         vm_map_t                real_map;
4381         register vm_map_offset_t        s,e;
4382         kern_return_t           rc;
4383         boolean_t               need_wakeup;
4384         boolean_t               main_map = FALSE;
4385         wait_interrupt_t        interruptible_state;
4386         thread_t                cur_thread;
4387         unsigned int            last_timestamp;
4388         vm_map_size_t           size;
4389         boolean_t               wire_and_extract;
4390
4391         wire_and_extract = FALSE;
4392         if (physpage_p != NULL) {
4393                 /*
4394                  * The caller wants the physical page number of the
4395                  * wired page.  We return only one physical page number
4396                  * so this works for only one page at a time.
4397                  */
4398                 if ((end - start) != PAGE_SIZE) {
4399                         return KERN_INVALID_ARGUMENT;
4400                 }
4401                 wire_and_extract = TRUE;
4402                 *physpage_p = 0;
4403         }
4404
4405         vm_map_lock(map);
4406         if(map_pmap == NULL)
4407                 main_map = TRUE;
4408         last_timestamp = map->timestamp;
4409
4410         VM_MAP_RANGE_CHECK(map, start, end);
4411         assert(page_aligned(start));
4412         assert(page_aligned(end));
4413         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
4414         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
4415         if (start == end) {
4416                 /* We wired what the caller asked for, zero pages */
4417                 vm_map_unlock(map);
4418                 return KERN_SUCCESS;
4419         }
4420
4421         need_wakeup = FALSE;
4422         cur_thread = current_thread();
4423
4424         s = start;
4425         rc = KERN_SUCCESS;
4426
4427         if (vm_map_lookup_entry(map, s, &first_entry)) {
4428                 entry = first_entry;
4429                 /*
4430                  * vm_map_clip_start will be done later.
4431                  * We don't want to unnest any nested submaps here !
4432                  */
4433         } else {
4434                 /* Start address is not in map */
4435                 rc = KERN_INVALID_ADDRESS;
4436                 goto done;
4437         }
4438
4439         while ((entry != vm_map_to_entry(map)) && (s < end)) {
4440                 /*
4441                  * At this point, we have wired from "start" to "s".
4442                  * We still need to wire from "s" to "end".
4443                  *
4444                  * "entry" hasn't been clipped, so it could start before "s"
4445                  * and/or end after "end".
4446                  */
4447
4448                 /* "e" is how far we want to wire in this entry */
4449                 e = entry->vme_end;
4450                 if (e > end)
4451                         e = end;
4452
4453                 /*
4454                  * If another thread is wiring/unwiring this entry then
4455                  * block after informing other thread to wake us up.
4456                  */
4457                 if (entry->in_transition) {
4458                         wait_result_t wait_result;
4459
4460                         /*
4461                          * We have not clipped the entry.  Make sure that
4462                          * the start address is in range so that the lookup
4463                          * below will succeed.
4464                          * "s" is the current starting point: we've already
4465                          * wired from "start" to "s" and we still have
4466                          * to wire from "s" to "end".
4467                          */
4468
4469                         entry->needs_wakeup = TRUE;
4470
4471                         /*
4472                          * wake up anybody waiting on entries that we have
4473                          * already wired.
4474                          */
4475                         if (need_wakeup) {
4476                                 vm_map_entry_wakeup(map);
4477                                 need_wakeup = FALSE;
4478                         }
4479                         /*
4480                          * User wiring is interruptible
4481                          */
4482                         wait_result = vm_map_entry_wait(map,
4483                                                         (user_wire) ? THREAD_ABORTSAFE :
4484                                                         THREAD_UNINT);
4485                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
4486                                 /*
4487                                  * undo the wirings we have done so far
4488                                  * We do not clear the needs_wakeup flag,
4489                                  * because we cannot tell if we were the
4490                                  * only one waiting.
4491                                  */
4492                                 rc = KERN_FAILURE;
4493                                 goto done;
4494                         }
4495
4496                         /*
4497                          * Cannot avoid a lookup here. reset timestamp.
4498                          */
4499                         last_timestamp = map->timestamp;
4500
4501                         /*
4502                          * The entry could have been clipped, look it up again.
4503                          * Worse that can happen is, it may not exist anymore.
4504                          */
4505                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
4506                                 /*
4507                                  * User: undo everything upto the previous
4508                                  * entry.  let vm_map_unwire worry about
4509                                  * checking the validity of the range.
4510                                  */
4511                                 rc = KERN_FAILURE;
4512                                 goto done;
4513                         }
4514                         entry = first_entry;
4515                         continue;
4516                 }
4517
4518                 if (entry->is_sub_map) {
4519                         vm_map_offset_t sub_start;
4520                         vm_map_offset_t sub_end;
4521                         vm_map_offset_t local_start;
4522                         vm_map_offset_t local_end;
4523                         pmap_t          pmap;
4524
4525                         if (wire_and_extract) {
4526                                 /*
4527                                  * Wiring would result in copy-on-write
4528                                  * which would not be compatible with
4529                                  * the sharing we have with the original
4530                                  * provider of this memory.
4531                                  */
4532                                 rc = KERN_INVALID_ARGUMENT;
4533                                 goto done;
4534                         }
4535
4536                         vm_map_clip_start(map, entry, s);
4537                         vm_map_clip_end(map, entry, end);
4538
4539                         sub_start = entry->offset;
4540                         sub_end = entry->vme_end;
4541                         sub_end += entry->offset - entry->vme_start;
4542
4543                         local_end = entry->vme_end;
4544                         if(map_pmap == NULL) {
4545                                 vm_object_t             object;
4546                                 vm_object_offset_t      offset;
4547                                 vm_prot_t               prot;
4548                                 boolean_t               wired;
4549                                 vm_map_entry_t          local_entry;
4550                                 vm_map_version_t         version;
4551                                 vm_map_t                lookup_map;
4552
4553                                 if(entry->use_pmap) {
4554                                         pmap = entry->object.sub_map->pmap;
4555                                         /* ppc implementation requires that */
4556                                         /* submaps pmap address ranges line */
4557                                         /* up with parent map */
4558 #ifdef notdef
4559                                         pmap_addr = sub_start;
4560 #endif
4561                                         pmap_addr = s;
4562                                 } else {
4563                                         pmap = map->pmap;
4564                                         pmap_addr = s;
4565                                 }
4566
4567                                 if (entry->wired_count) {
4568                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4569                                                 goto done;
4570
4571                                         /*
4572                                          * The map was not unlocked:
4573                                          * no need to goto re-lookup.
4574                                          * Just go directly to next entry.
4575                                          */
4576                                         entry = entry->vme_next;
4577                                         s = entry->vme_start;
4578                                         continue;
4579
4580                                 }
4581
4582                                 /* call vm_map_lookup_locked to */
4583                                 /* cause any needs copy to be   */
4584                                 /* evaluated */
4585                                 local_start = entry->vme_start;
4586                                 lookup_map = map;
4587                                 vm_map_lock_write_to_read(map);
4588                                 if(vm_map_lookup_locked(
4589                                            &lookup_map, local_start,
4590                                            access_type,
4591                                            OBJECT_LOCK_EXCLUSIVE,
4592                                            &version, &object,
4593                                            &offset, &prot, &wired,
4594                                            NULL,
4595                                            &real_map)) {
4596
4597                                         vm_map_unlock_read(lookup_map);
4598                                         vm_map_unwire(map, start,
4599                                                       s, user_wire);
4600                                         return(KERN_FAILURE);
4601                                 }
4602                                 vm_object_unlock(object);
4603                                 if(real_map != lookup_map)
4604                                         vm_map_unlock(real_map);
4605                                 vm_map_unlock_read(lookup_map);
4606                                 vm_map_lock(map);
4607
4608                                 /* we unlocked, so must re-lookup */
4609                                 if (!vm_map_lookup_entry(map,
4610                                                          local_start,
4611                                                          &local_entry)) {
4612                                         rc = KERN_FAILURE;
4613                                         goto done;
4614                                 }
4615
4616                                 /*
4617                                  * entry could have been "simplified",
4618                                  * so re-clip
4619                                  */
4620                                 entry = local_entry;
4621                                 assert(s == local_start);
4622                                 vm_map_clip_start(map, entry, s);
4623                                 vm_map_clip_end(map, entry, end);
4624                                 /* re-compute "e" */
4625                                 e = entry->vme_end;
4626                                 if (e > end)
4627                                         e = end;
4628
4629                                 /* did we have a change of type? */
4630                                 if (!entry->is_sub_map) {
4631                                         last_timestamp = map->timestamp;
4632                                         continue;
4633                                 }
4634                         } else {
4635                                 local_start = entry->vme_start;
4636                                 pmap = map_pmap;
4637                         }
4638
4639                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4640                                 goto done;
4641
4642                         entry->in_transition = TRUE;
4643
4644                         vm_map_unlock(map);
4645                         rc = vm_map_wire_nested(entry->object.sub_map,
4646                                                 sub_start, sub_end,
4647                                                 access_type,
4648                                                 user_wire, pmap, pmap_addr,
4649                                                 NULL);
4650                         vm_map_lock(map);
4651
4652                         /*
4653                          * Find the entry again.  It could have been clipped
4654                          * after we unlocked the map.
4655                          */
4656                         if (!vm_map_lookup_entry(map, local_start,
4657                                                  &first_entry))
4658                                 panic("vm_map_wire: re-lookup failed");
4659                         entry = first_entry;
4660
4661                         assert(local_start == s);
4662                         /* re-compute "e" */
4663                         e = entry->vme_end;
4664                         if (e > end)
4665                                 e = end;
4666
4667                         last_timestamp = map->timestamp;
4668                         while ((entry != vm_map_to_entry(map)) &&
4669                                (entry->vme_start < e)) {
4670                                 assert(entry->in_transition);
4671                                 entry->in_transition = FALSE;
4672                                 if (entry->needs_wakeup) {
4673                                         entry->needs_wakeup = FALSE;
4674                                         need_wakeup = TRUE;
4675                                 }
4676                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
4677                                         subtract_wire_counts(map, entry, user_wire);
4678                                 }
4679                                 entry = entry->vme_next;
4680                         }
4681                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4682                                 goto done;
4683                         }
4684
4685                         /* no need to relookup again */
4686                         s = entry->vme_start;
4687                         continue;
4688                 }
4689
4690                 /*
4691                  * If this entry is already wired then increment
4692                  * the appropriate wire reference count.
4693                  */
4694                 if (entry->wired_count) {
4695
4696                         if ((entry->protection & access_type) != access_type) {
4697                                 /* found a protection problem */
4698
4699                                 /*
4700                                  * XXX FBDP
4701                                  * We should always return an error
4702                                  * in this case but since we didn't
4703                                  * enforce it before, let's do
4704                                  * it only for the new "wire_and_extract"
4705                                  * code path for now...
4706                                  */
4707                                 if (wire_and_extract) {
4708                                         rc = KERN_PROTECTION_FAILURE;
4709                                         goto done;
4710                                 }
4711                         }
4712
4713                         /*
4714                          * entry is already wired down, get our reference
4715                          * after clipping to our range.
4716                          */
4717                         vm_map_clip_start(map, entry, s);
4718                         vm_map_clip_end(map, entry, end);
4719
4720                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4721                                 goto done;
4722
4723                         if (wire_and_extract) {
4724                                 vm_object_t             object;
4725                                 vm_object_offset_t      offset;
4726                                 vm_page_t               m;
4727
4728                                 /*
4729                                  * We don't have to "wire" the page again
4730                                  * bit we still have to "extract" its
4731                                  * physical page number, after some sanity
4732                                  * checks.
4733                                  */
4734                                 assert((entry->vme_end - entry->vme_start)
4735                                        == PAGE_SIZE);
4736                                 assert(!entry->needs_copy);
4737                                 assert(!entry->is_sub_map);
4738                                 assert(entry->object.vm_object);
4739                                 if (((entry->vme_end - entry->vme_start)
4740                                      != PAGE_SIZE) ||
4741                                     entry->needs_copy ||
4742                                     entry->is_sub_map ||
4743                                     entry->object.vm_object == VM_OBJECT_NULL) {
4744                                         rc = KERN_INVALID_ARGUMENT;
4745                                         goto done;
4746                                 }
4747
4748                                 object = entry->object.vm_object;
4749                                 offset = entry->offset;
4750                                 /* need exclusive lock to update m->dirty */
4751                                 if (entry->protection & VM_PROT_WRITE) {
4752                                         vm_object_lock(object);
4753                                 } else {
4754                                         vm_object_lock_shared(object);
4755                                 }
4756                                 m = vm_page_lookup(object, offset);
4757                                 assert(m != VM_PAGE_NULL);
4758                                 assert(m->wire_count);
4759                                 if (m != VM_PAGE_NULL && m->wire_count) {
4760                                         *physpage_p = m->phys_page;
4761                                         if (entry->protection & VM_PROT_WRITE) {
4762                                                 vm_object_lock_assert_exclusive(
4763                                                         m->object);
4764                                                 m->dirty = TRUE;
4765                                         }
4766                                 } else {
4767                                         /* not already wired !? */
4768                                         *physpage_p = 0;
4769                                 }
4770                                 vm_object_unlock(object);
4771                         }
4772
4773                         /* map was not unlocked: no need to relookup */
4774                         entry = entry->vme_next;
4775                         s = entry->vme_start;
4776                         continue;
4777                 }
4778
4779                 /*
4780                  * Unwired entry or wire request transmitted via submap
4781                  */
4782
4783
4784                 /*
4785                  * Perform actions of vm_map_lookup that need the write
4786                  * lock on the map: create a shadow object for a
4787                  * copy-on-write region, or an object for a zero-fill
4788                  * region.
4789                  */
4790                 size = entry->vme_end - entry->vme_start;
4791                 /*
4792                  * If wiring a copy-on-write page, we need to copy it now
4793                  * even if we're only (currently) requesting read access.
4794                  * This is aggressive, but once it's wired we can't move it.
4795                  */
4796                 if (entry->needs_copy) {
4797                         if (wire_and_extract) {
4798                                 /*
4799                                  * We're supposed to share with the original
4800                                  * provider so should not be "needs_copy"
4801                                  */
4802                                 rc = KERN_INVALID_ARGUMENT;
4803                                 goto done;
4804                         }
4805
4806                         vm_object_shadow(&entry->object.vm_object,
4807                                          &entry->offset, size);
4808                         entry->needs_copy = FALSE;
4809                 } else if (entry->object.vm_object == VM_OBJECT_NULL) {
4810                         if (wire_and_extract) {
4811                                 /*
4812                                  * We're supposed to share with the original
4813                                  * provider so should already have an object.
4814                                  */
4815                                 rc = KERN_INVALID_ARGUMENT;
4816                                 goto done;
4817                         }
4818                         entry->object.vm_object = vm_object_allocate(size);
4819                         entry->offset = (vm_object_offset_t)0;
4820                         assert(entry->use_pmap);
4821                 }
4822
4823                 vm_map_clip_start(map, entry, s);
4824                 vm_map_clip_end(map, entry, end);
4825
4826                 /* re-compute "e" */
4827                 e = entry->vme_end;
4828                 if (e > end)
4829                         e = end;
4830
4831                 /*
4832                  * Check for holes and protection mismatch.
4833                  * Holes: Next entry should be contiguous unless this
4834                  *        is the end of the region.
4835                  * Protection: Access requested must be allowed, unless
4836                  *      wiring is by protection class
4837                  */
4838                 if ((entry->vme_end < end) &&
4839                     ((entry->vme_next == vm_map_to_entry(map)) ||
4840                      (entry->vme_next->vme_start > entry->vme_end))) {
4841                         /* found a hole */
4842                         rc = KERN_INVALID_ADDRESS;
4843                         goto done;
4844                 }
4845                 if ((entry->protection & access_type) != access_type) {
4846                         /* found a protection problem */
4847                         rc = KERN_PROTECTION_FAILURE;
4848                         goto done;
4849                 }
4850
4851                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
4852
4853                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
4854                         goto done;
4855
4856                 entry->in_transition = TRUE;
4857
4858                 /*
4859                  * This entry might get split once we unlock the map.
4860                  * In vm_fault_wire(), we need the current range as
4861                  * defined by this entry.  In order for this to work
4862                  * along with a simultaneous clip operation, we make a
4863                  * temporary copy of this entry and use that for the
4864                  * wiring.  Note that the underlying objects do not
4865                  * change during a clip.
4866                  */
4867                 tmp_entry = *entry;
4868
4869                 /*
4870                  * The in_transition state guarentees that the entry
4871                  * (or entries for this range, if split occured) will be
4872                  * there when the map lock is acquired for the second time.
4873                  */
4874                 vm_map_unlock(map);
4875
4876                 if (!user_wire && cur_thread != THREAD_NULL)
4877                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
4878                 else
4879                         interruptible_state = THREAD_UNINT;
4880
4881                 if(map_pmap)
4882                         rc = vm_fault_wire(map,
4883                                            &tmp_entry, map_pmap, pmap_addr,
4884                                            physpage_p);
4885                 else
4886                         rc = vm_fault_wire(map,
4887                                            &tmp_entry, map->pmap,
4888                                            tmp_entry.vme_start,
4889                                            physpage_p);
4890
4891                 if (!user_wire && cur_thread != THREAD_NULL)
4892                         thread_interrupt_level(interruptible_state);
4893
4894                 vm_map_lock(map);
4895
4896                 if (last_timestamp+1 != map->timestamp) {
4897                         /*
4898                          * Find the entry again.  It could have been clipped
4899                          * after we unlocked the map.
4900                          */
4901                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
4902                                                  &first_entry))
4903                                 panic("vm_map_wire: re-lookup failed");
4904
4905                         entry = first_entry;
4906                 }
4907
4908                 last_timestamp = map->timestamp;
4909
4910                 while ((entry != vm_map_to_entry(map)) &&
4911                        (entry->vme_start < tmp_entry.vme_end)) {
4912                         assert(entry->in_transition);
4913                         entry->in_transition = FALSE;
4914                         if (entry->needs_wakeup) {
4915                                 entry->needs_wakeup = FALSE;
4916                                 need_wakeup = TRUE;
4917                         }
4918                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
4919                                 subtract_wire_counts(map, entry, user_wire);
4920                         }
4921                         entry = entry->vme_next;
4922                 }
4923
4924                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
4925                         goto done;
4926                 }
4927
4928                 s = entry->vme_start;
4929         } /* end while loop through map entries */
4930
4931 done:
4932         if (rc == KERN_SUCCESS) {
4933                 /* repair any damage we may have made to the VM map */
4934                 vm_map_simplify_range(map, start, end);
4935         }
4936
4937         vm_map_unlock(map);
4938
4939         /*
4940          * wake up anybody waiting on entries we wired.
4941          */
4942         if (need_wakeup)
4943                 vm_map_entry_wakeup(map);
4944
4945         if (rc != KERN_SUCCESS) {
4946                 /* undo what has been wired so far */
4947                 vm_map_unwire(map, start, s, user_wire);
4948                 if (physpage_p) {
4949                         *physpage_p = 0;
4950                 }
4951         }
4952
4953         return rc;
4954
4955 }
4956
4957 kern_return_t
4958 vm_map_wire(
4959         register vm_map_t       map,
4960         register vm_map_offset_t        start,
4961         register vm_map_offset_t        end,
4962         register vm_prot_t      access_type,
4963         boolean_t               user_wire)
4964 {
4965
4966         kern_return_t   kret;
4967
4968         kret = vm_map_wire_nested(map, start, end, access_type,
4969                                   user_wire, (pmap_t)NULL, 0, NULL);
4970         return kret;
4971 }
4972
4973 kern_return_t
4974 vm_map_wire_and_extract(
4975         vm_map_t        map,
4976         vm_map_offset_t start,
4977         vm_prot_t       access_type,
4978         boolean_t       user_wire,
4979         ppnum_t         *physpage_p)
4980 {
4981
4982         kern_return_t   kret;
4983
4984         kret = vm_map_wire_nested(map,
4985                                   start,
4986                                   start+VM_MAP_PAGE_SIZE(map),
4987                                   access_type,
4988                                   user_wire,
4989                                   (pmap_t)NULL,
4990                                   0,
4991                                   physpage_p);
4992         if (kret != KERN_SUCCESS &&
4993             physpage_p != NULL) {
4994                 *physpage_p = 0;
4995         }
4996         return kret;
4997 }
4998
4999 /*
5000  *      vm_map_unwire:
5001  *
5002  *      Sets the pageability of the specified address range in the target
5003  *      as pageable.  Regions specified must have been wired previously.
5004  *
5005  *      The map must not be locked, but a reference must remain to the map
5006  *      throughout the call.
5007  *
5008  *      Kernel will panic on failures.  User unwire ignores holes and
5009  *      unwired and intransition entries to avoid losing memory by leaving
5010  *      it unwired.
5011  */
5012 static kern_return_t
5013 vm_map_unwire_nested(
5014         register vm_map_t       map,
5015         register vm_map_offset_t        start,
5016         register vm_map_offset_t        end,
5017         boolean_t               user_wire,
5018         pmap_t                  map_pmap,
5019         vm_map_offset_t         pmap_addr)
5020 {
5021         register vm_map_entry_t entry;
5022         struct vm_map_entry     *first_entry, tmp_entry;
5023         boolean_t               need_wakeup;
5024         boolean_t               main_map = FALSE;
5025         unsigned int            last_timestamp;
5026
5027         vm_map_lock(map);
5028         if(map_pmap == NULL)
5029                 main_map = TRUE;
5030         last_timestamp = map->timestamp;
5031
5032         VM_MAP_RANGE_CHECK(map, start, end);
5033         assert(page_aligned(start));
5034         assert(page_aligned(end));
5035         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5036         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5037
5038         if (start == end) {
5039                 /* We unwired what the caller asked for: zero pages */
5040                 vm_map_unlock(map);
5041                 return KERN_SUCCESS;
5042         }
5043
5044         if (vm_map_lookup_entry(map, start, &first_entry)) {
5045                 entry = first_entry;
5046                 /*
5047                  * vm_map_clip_start will be done later.
5048                  * We don't want to unnest any nested sub maps here !
5049                  */
5050         }
5051         else {
5052                 if (!user_wire) {
5053                         panic("vm_map_unwire: start not found");
5054                 }
5055                 /*      Start address is not in map. */
5056                 vm_map_unlock(map);
5057                 return(KERN_INVALID_ADDRESS);
5058         }
5059
5060         if (entry->superpage_size) {
5061                 /* superpages are always wired */
5062                 vm_map_unlock(map);
5063                 return KERN_INVALID_ADDRESS;
5064         }
5065
5066         need_wakeup = FALSE;
5067         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5068                 if (entry->in_transition) {
5069                         /*
5070                          * 1)
5071                          * Another thread is wiring down this entry. Note
5072                          * that if it is not for the other thread we would
5073                          * be unwiring an unwired entry.  This is not
5074                          * permitted.  If we wait, we will be unwiring memory
5075                          * we did not wire.
5076                          *
5077                          * 2)
5078                          * Another thread is unwiring this entry.  We did not
5079                          * have a reference to it, because if we did, this
5080                          * entry will not be getting unwired now.
5081                          */
5082                         if (!user_wire) {
5083                                 /*
5084                                  * XXX FBDP
5085                                  * This could happen:  there could be some
5086                                  * overlapping vslock/vsunlock operations
5087                                  * going on.
5088                                  * We should probably just wait and retry,
5089                                  * but then we have to be careful that this
5090                                  * entry could get "simplified" after
5091                                  * "in_transition" gets unset and before
5092                                  * we re-lookup the entry, so we would
5093                                  * have to re-clip the entry to avoid
5094                                  * re-unwiring what we have already unwired...
5095                                  * See vm_map_wire_nested().
5096                                  *
5097                                  * Or we could just ignore "in_transition"
5098                                  * here and proceed to decement the wired
5099                                  * count(s) on this entry.  That should be fine
5100                                  * as long as "wired_count" doesn't drop all
5101                                  * the way to 0 (and we should panic if THAT
5102                                  * happens).
5103                                  */
5104                                 panic("vm_map_unwire: in_transition entry");
5105                         }
5106
5107                         entry = entry->vme_next;
5108                         continue;
5109                 }
5110
5111                 if (entry->is_sub_map) {
5112                         vm_map_offset_t sub_start;
5113                         vm_map_offset_t sub_end;
5114                         vm_map_offset_t local_end;
5115                         pmap_t          pmap;
5116
5117                         vm_map_clip_start(map, entry, start);
5118                         vm_map_clip_end(map, entry, end);
5119
5120                         sub_start = entry->offset;
5121                         sub_end = entry->vme_end - entry->vme_start;
5122                         sub_end += entry->offset;
5123                         local_end = entry->vme_end;
5124                         if(map_pmap == NULL) {
5125                                 if(entry->use_pmap) {
5126                                         pmap = entry->object.sub_map->pmap;
5127                                         pmap_addr = sub_start;
5128                                 } else {
5129                                         pmap = map->pmap;
5130                                         pmap_addr = start;
5131                                 }
5132                                 if (entry->wired_count == 0 ||
5133                                     (user_wire && entry->user_wired_count == 0)) {
5134                                         if (!user_wire)
5135                                                 panic("vm_map_unwire: entry is unwired");
5136                                         entry = entry->vme_next;
5137                                         continue;
5138                                 }
5139
5140                                 /*
5141                                  * Check for holes
5142                                  * Holes: Next entry should be contiguous unless
5143                                  * this is the end of the region.
5144                                  */
5145                                 if (((entry->vme_end < end) &&
5146                                      ((entry->vme_next == vm_map_to_entry(map)) ||
5147                                       (entry->vme_next->vme_start
5148                                        > entry->vme_end)))) {
5149                                         if (!user_wire)
5150                                                 panic("vm_map_unwire: non-contiguous region");
5151 /*
5152                                         entry = entry->vme_next;
5153                                         continue;
5154 */
5155                                 }
5156
5157                                 subtract_wire_counts(map, entry, user_wire);
5158
5159                                 if (entry->wired_count != 0) {
5160                                         entry = entry->vme_next;
5161                                         continue;
5162                                 }
5163
5164                                 entry->in_transition = TRUE;
5165                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
5166
5167                                 /*
5168                                  * We can unlock the map now. The in_transition state
5169                                  * guarantees existance of the entry.
5170                                  */
5171                                 vm_map_unlock(map);
5172                                 vm_map_unwire_nested(entry->object.sub_map,
5173                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
5174                                 vm_map_lock(map);
5175
5176                                 if (last_timestamp+1 != map->timestamp) {
5177                                         /*
5178                                          * Find the entry again.  It could have been
5179                                          * clipped or deleted after we unlocked the map.
5180                                          */
5181                                         if (!vm_map_lookup_entry(map,
5182                                                                  tmp_entry.vme_start,
5183                                                                  &first_entry)) {
5184                                                 if (!user_wire)
5185                                                         panic("vm_map_unwire: re-lookup failed");
5186                                                 entry = first_entry->vme_next;
5187                                         } else
5188                                                 entry = first_entry;
5189                                 }
5190                                 last_timestamp = map->timestamp;
5191
5192                                 /*
5193                                  * clear transition bit for all constituent entries
5194                                  * that were in the original entry (saved in
5195                                  * tmp_entry).  Also check for waiters.
5196                                  */
5197                                 while ((entry != vm_map_to_entry(map)) &&
5198                                        (entry->vme_start < tmp_entry.vme_end)) {
5199                                         assert(entry->in_transition);
5200                                         entry->in_transition = FALSE;
5201                                         if (entry->needs_wakeup) {
5202                                                 entry->needs_wakeup = FALSE;
5203                                                 need_wakeup = TRUE;
5204                                         }
5205                                         entry = entry->vme_next;
5206                                 }
5207                                 continue;
5208                         } else {
5209                                 vm_map_unlock(map);
5210                                 vm_map_unwire_nested(entry->object.sub_map,
5211                                                      sub_start, sub_end, user_wire, map_pmap,
5212                                                      pmap_addr);
5213                                 vm_map_lock(map);
5214
5215                                 if (last_timestamp+1 != map->timestamp) {
5216                                         /*
5217                                          * Find the entry again.  It could have been
5218                                          * clipped or deleted after we unlocked the map.
5219                                          */
5220                                         if (!vm_map_lookup_entry(map,
5221                                                                  tmp_entry.vme_start,
5222                                                                  &first_entry)) {
5223                                                 if (!user_wire)
5224                                                         panic("vm_map_unwire: re-lookup failed");
5225                                                 entry = first_entry->vme_next;
5226                                         } else
5227                                                 entry = first_entry;
5228                                 }
5229                                 last_timestamp = map->timestamp;
5230                         }
5231                 }
5232
5233
5234                 if ((entry->wired_count == 0) ||
5235                     (user_wire && entry->user_wired_count == 0)) {
5236                         if (!user_wire)
5237                                 panic("vm_map_unwire: entry is unwired");
5238
5239                         entry = entry->vme_next;
5240                         continue;
5241                 }
5242
5243                 assert(entry->wired_count > 0 &&
5244                        (!user_wire || entry->user_wired_count > 0));
5245
5246                 vm_map_clip_start(map, entry, start);
5247                 vm_map_clip_end(map, entry, end);
5248
5249                 /*
5250                  * Check for holes
5251                  * Holes: Next entry should be contiguous unless
5252                  *        this is the end of the region.
5253                  */
5254                 if (((entry->vme_end < end) &&
5255                      ((entry->vme_next == vm_map_to_entry(map)) ||
5256                       (entry->vme_next->vme_start > entry->vme_end)))) {
5257
5258                         if (!user_wire)
5259                                 panic("vm_map_unwire: non-contiguous region");
5260                         entry = entry->vme_next;
5261                         continue;
5262                 }
5263
5264                 subtract_wire_counts(map, entry, user_wire);
5265
5266                 if (entry->wired_count != 0) {
5267                         entry = entry->vme_next;
5268                         continue;
5269                 }
5270
5271                 if(entry->zero_wired_pages) {
5272                         entry->zero_wired_pages = FALSE;
5273                 }
5274
5275                 entry->in_transition = TRUE;
5276                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
5277
5278                 /*
5279                  * We can unlock the map now. The in_transition state
5280                  * guarantees existance of the entry.
5281                  */
5282                 vm_map_unlock(map);
5283                 if(map_pmap) {
5284                         vm_fault_unwire(map,
5285                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
5286                 } else {
5287                         vm_fault_unwire(map,
5288                                         &tmp_entry, FALSE, map->pmap,
5289                                         tmp_entry.vme_start);
5290                 }
5291                 vm_map_lock(map);
5292
5293                 if (last_timestamp+1 != map->timestamp) {
5294                         /*
5295                          * Find the entry again.  It could have been clipped
5296                          * or deleted after we unlocked the map.
5297                          */
5298                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
5299                                                  &first_entry)) {
5300                                 if (!user_wire)
5301                                         panic("vm_map_unwire: re-lookup failed");
5302                                 entry = first_entry->vme_next;
5303                         } else
5304                                 entry = first_entry;
5305                 }
5306                 last_timestamp = map->timestamp;
5307
5308                 /*
5309                  * clear transition bit for all constituent entries that
5310                  * were in the original entry (saved in tmp_entry).  Also
5311                  * check for waiters.
5312                  */
5313                 while ((entry != vm_map_to_entry(map)) &&
5314                        (entry->vme_start < tmp_entry.vme_end)) {
5315                         assert(entry->in_transition);
5316                         entry->in_transition = FALSE;
5317                         if (entry->needs_wakeup) {
5318                                 entry->needs_wakeup = FALSE;
5319                                 need_wakeup = TRUE;
5320                         }
5321                         entry = entry->vme_next;
5322                 }
5323         }
5324
5325         /*
5326          * We might have fragmented the address space when we wired this
5327          * range of addresses.  Attempt to re-coalesce these VM map entries
5328          * with their neighbors now that they're no longer wired.
5329          * Under some circumstances, address space fragmentation can
5330          * prevent VM object shadow chain collapsing, which can cause
5331          * swap space leaks.
5332          */
5333         vm_map_simplify_range(map, start, end);
5334
5335         vm_map_unlock(map);
5336         /*
5337          * wake up anybody waiting on entries that we have unwired.
5338          */
5339         if (need_wakeup)
5340                 vm_map_entry_wakeup(map);
5341         return(KERN_SUCCESS);
5342
5343 }
5344
5345 kern_return_t
5346 vm_map_unwire(
5347         register vm_map_t       map,
5348         register vm_map_offset_t        start,
5349         register vm_map_offset_t        end,
5350         boolean_t               user_wire)
5351 {
5352         return vm_map_unwire_nested(map, start, end,
5353                                     user_wire, (pmap_t)NULL, 0);
5354 }
5355
5356
5357 /*
5358  *      vm_map_entry_delete:    [ internal use only ]
5359  *
5360  *      Deallocate the given entry from the target map.
5361  */
5362 static void
5363 vm_map_entry_delete(
5364         register vm_map_t       map,
5365         register vm_map_entry_t entry)
5366 {
5367         register vm_map_offset_t        s, e;
5368         register vm_object_t    object;
5369         register vm_map_t       submap;
5370
5371         s = entry->vme_start;
5372         e = entry->vme_end;
5373         assert(page_aligned(s));
5374         assert(page_aligned(e));
5375         if (entry->map_aligned == TRUE) {
5376                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
5377                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
5378         }
5379         assert(entry->wired_count == 0);
5380         assert(entry->user_wired_count == 0);
5381         assert(!entry->permanent);
5382
5383         if (entry->is_sub_map) {
5384                 object = NULL;
5385                 submap = entry->object.sub_map;
5386         } else {
5387                 submap = NULL;
5388                 object = entry->object.vm_object;
5389         }
5390
5391         vm_map_store_entry_unlink(map, entry);
5392         map->size -= e - s;
5393
5394         vm_map_entry_dispose(map, entry);
5395
5396         vm_map_unlock(map);
5397         /*
5398          *      Deallocate the object only after removing all
5399          *      pmap entries pointing to its pages.
5400          */
5401         if (submap)
5402                 vm_map_deallocate(submap);
5403         else
5404                 vm_object_deallocate(object);
5405
5406 }
5407
5408 void
5409 vm_map_submap_pmap_clean(
5410         vm_map_t        map,
5411         vm_map_offset_t start,
5412         vm_map_offset_t end,
5413         vm_map_t        sub_map,
5414         vm_map_offset_t offset)
5415 {
5416         vm_map_offset_t submap_start;
5417         vm_map_offset_t submap_end;
5418         vm_map_size_t   remove_size;
5419         vm_map_entry_t  entry;
5420
5421         submap_end = offset + (end - start);
5422         submap_start = offset;
5423
5424         vm_map_lock_read(sub_map);
5425         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
5426
5427                 remove_size = (entry->vme_end - entry->vme_start);
5428                 if(offset > entry->vme_start)
5429                         remove_size -= offset - entry->vme_start;
5430
5431
5432                 if(submap_end < entry->vme_end) {
5433                         remove_size -=
5434                                 entry->vme_end - submap_end;
5435                 }
5436                 if(entry->is_sub_map) {
5437                         vm_map_submap_pmap_clean(
5438                                 sub_map,
5439                                 start,
5440                                 start + remove_size,
5441                                 entry->object.sub_map,
5442                                 entry->offset);
5443                 } else {
5444
5445                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5446                            && (entry->object.vm_object != NULL)) {
5447                                 vm_object_pmap_protect(
5448                                         entry->object.vm_object,
5449                                         entry->offset+(offset-entry->vme_start),
5450                                         remove_size,
5451                                         PMAP_NULL,
5452                                         entry->vme_start,
5453                                         VM_PROT_NONE);
5454                         } else {
5455                                 pmap_remove(map->pmap,
5456                                             (addr64_t)start,
5457                                             (addr64_t)(start + remove_size));
5458                         }
5459                 }
5460         }
5461
5462         entry = entry->vme_next;
5463
5464         while((entry != vm_map_to_entry(sub_map))
5465               && (entry->vme_start < submap_end)) {
5466                 remove_size = (entry->vme_end - entry->vme_start);
5467                 if(submap_end < entry->vme_end) {
5468                         remove_size -= entry->vme_end - submap_end;
5469                 }
5470                 if(entry->is_sub_map) {
5471                         vm_map_submap_pmap_clean(
5472                                 sub_map,
5473                                 (start + entry->vme_start) - offset,
5474                                 ((start + entry->vme_start) - offset) + remove_size,
5475                                 entry->object.sub_map,
5476                                 entry->offset);
5477                 } else {
5478                         if((map->mapped_in_other_pmaps) && (map->ref_count)
5479                            && (entry->object.vm_object != NULL)) {
5480                                 vm_object_pmap_protect(
5481                                         entry->object.vm_object,
5482                                         entry->offset,
5483                                         remove_size,
5484                                         PMAP_NULL,
5485                                         entry->vme_start,
5486                                         VM_PROT_NONE);
5487                         } else {
5488                                 pmap_remove(map->pmap,
5489                                             (addr64_t)((start + entry->vme_start)
5490                                                        - offset),
5491                                             (addr64_t)(((start + entry->vme_start)
5492                                                         - offset) + remove_size));
5493                         }
5494                 }
5495                 entry = entry->vme_next;
5496         }
5497         vm_map_unlock_read(sub_map);
5498         return;
5499 }
5500
5501 /*
5502  *      vm_map_delete:  [ internal use only ]
5503  *
5504  *      Deallocates the given address range from the target map.
5505  *      Removes all user wirings. Unwires one kernel wiring if
5506  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
5507  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
5508  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
5509  *
5510  *      This routine is called with map locked and leaves map locked.
5511  */
5512 static kern_return_t
5513 vm_map_delete(
5514         vm_map_t                map,
5515         vm_map_offset_t         start,
5516         vm_map_offset_t         end,
5517         int                     flags,
5518         vm_map_t                zap_map)
5519 {
5520         vm_map_entry_t          entry, next;
5521         struct   vm_map_entry   *first_entry, tmp_entry;
5522         register vm_map_offset_t s;
5523         register vm_object_t    object;
5524         boolean_t               need_wakeup;
5525         unsigned int            last_timestamp = ~0; /* unlikely value */
5526         int                     interruptible;
5527
5528         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
5529                 THREAD_ABORTSAFE : THREAD_UNINT;
5530
5531         /*
5532          * All our DMA I/O operations in IOKit are currently done by
5533          * wiring through the map entries of the task requesting the I/O.
5534          * Because of this, we must always wait for kernel wirings
5535          * to go away on the entries before deleting them.
5536          *
5537          * Any caller who wants to actually remove a kernel wiring
5538          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
5539          * properly remove one wiring instead of blasting through
5540          * them all.
5541          */
5542         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
5543
5544         while(1) {
5545                 /*
5546                  *      Find the start of the region, and clip it
5547                  */
5548                 if (vm_map_lookup_entry(map, start, &first_entry)) {
5549                         entry = first_entry;
5550                         if (map == kalloc_map &&
5551                             (entry->vme_start != start ||
5552                              entry->vme_end != end)) {
5553                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5554                                       "mismatched entry %p [0x%llx:0x%llx]\n",
5555                                       map,
5556                                       (uint64_t)start,
5557                                       (uint64_t)end,
5558                                       entry,
5559                                       (uint64_t)entry->vme_start,
5560                                       (uint64_t)entry->vme_end);
5561                         }
5562                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
5563                                 start = SUPERPAGE_ROUND_DOWN(start);
5564                                 continue;
5565                         }
5566                         if (start == entry->vme_start) {
5567                                 /*
5568                                  * No need to clip.  We don't want to cause
5569                                  * any unnecessary unnesting in this case...
5570                                  */
5571                         } else {
5572                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5573                                     entry->map_aligned &&
5574                                     !VM_MAP_PAGE_ALIGNED(
5575                                             start,
5576                                             VM_MAP_PAGE_MASK(map))) {
5577                                         /*
5578                                          * The entry will no longer be
5579                                          * map-aligned after clipping
5580                                          * and the caller said it's OK.
5581                                          */
5582                                         entry->map_aligned = FALSE;
5583                                 }
5584                                 if (map == kalloc_map) {
5585                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
5586                                               " clipping %p at 0x%llx\n",
5587                                               map,
5588                                               (uint64_t)start,
5589                                               (uint64_t)end,
5590                                               entry,
5591                                               (uint64_t)start);
5592                                 }
5593                                 vm_map_clip_start(map, entry, start);
5594                         }
5595
5596                         /*
5597                          *      Fix the lookup hint now, rather than each
5598                          *      time through the loop.
5599                          */
5600                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5601                 } else {
5602                         if (map->pmap == kernel_pmap &&
5603                             map->ref_count != 0) {
5604                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5605                                       "no map entry at 0x%llx\n",
5606                                       map,
5607                                       (uint64_t)start,
5608                                       (uint64_t)end,
5609                                       (uint64_t)start);
5610                         }
5611                         entry = first_entry->vme_next;
5612                 }
5613                 break;
5614         }
5615         if (entry->superpage_size)
5616                 end = SUPERPAGE_ROUND_UP(end);
5617
5618         need_wakeup = FALSE;
5619         /*
5620          *      Step through all entries in this region
5621          */
5622         s = entry->vme_start;
5623         while ((entry != vm_map_to_entry(map)) && (s < end)) {
5624                 /*
5625                  * At this point, we have deleted all the memory entries
5626                  * between "start" and "s".  We still need to delete
5627                  * all memory entries between "s" and "end".
5628                  * While we were blocked and the map was unlocked, some
5629                  * new memory entries could have been re-allocated between
5630                  * "start" and "s" and we don't want to mess with those.
5631                  * Some of those entries could even have been re-assembled
5632                  * with an entry after "s" (in vm_map_simplify_entry()), so
5633                  * we may have to vm_map_clip_start() again.
5634                  */
5635
5636                 if (entry->vme_start >= s) {
5637                         /*
5638                          * This entry starts on or after "s"
5639                          * so no need to clip its start.
5640                          */
5641                 } else {
5642                         /*
5643                          * This entry has been re-assembled by a
5644                          * vm_map_simplify_entry().  We need to
5645                          * re-clip its start.
5646                          */
5647                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5648                             entry->map_aligned &&
5649                             !VM_MAP_PAGE_ALIGNED(s,
5650                                                  VM_MAP_PAGE_MASK(map))) {
5651                                 /*
5652                                  * The entry will no longer be map-aligned
5653                                  * after clipping and the caller said it's OK.
5654                                  */
5655                                 entry->map_aligned = FALSE;
5656                         }
5657                         if (map == kalloc_map) {
5658                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5659                                       "clipping %p at 0x%llx\n",
5660                                       map,
5661                                       (uint64_t)start,
5662                                       (uint64_t)end,
5663                                       entry,
5664                                       (uint64_t)s);
5665                         }
5666                         vm_map_clip_start(map, entry, s);
5667                 }
5668                 if (entry->vme_end <= end) {
5669                         /*
5670                          * This entry is going away completely, so no need
5671                          * to clip and possibly cause an unnecessary unnesting.
5672                          */
5673                 } else {
5674                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
5675                             entry->map_aligned &&
5676                             !VM_MAP_PAGE_ALIGNED(end,
5677                                                  VM_MAP_PAGE_MASK(map))) {
5678                                 /*
5679                                  * The entry will no longer be map-aligned
5680                                  * after clipping and the caller said it's OK.
5681                                  */
5682                                 entry->map_aligned = FALSE;
5683                         }
5684                         if (map == kalloc_map) {
5685                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
5686                                       "clipping %p at 0x%llx\n",
5687                                       map,
5688                                       (uint64_t)start,
5689                                       (uint64_t)end,
5690                                       entry,
5691                                       (uint64_t)end);
5692                         }
5693                         vm_map_clip_end(map, entry, end);
5694                 }
5695
5696                 if (entry->permanent) {
5697                         panic("attempt to remove permanent VM map entry "
5698                               "%p [0x%llx:0x%llx]\n",
5699                               entry, (uint64_t) s, (uint64_t) end);
5700                 }
5701
5702
5703                 if (entry->in_transition) {
5704                         wait_result_t wait_result;
5705
5706                         /*
5707                          * Another thread is wiring/unwiring this entry.
5708                          * Let the other thread know we are waiting.
5709                          */
5710                         assert(s == entry->vme_start);
5711                         entry->needs_wakeup = TRUE;
5712
5713                         /*
5714                          * wake up anybody waiting on entries that we have
5715                          * already unwired/deleted.
5716                          */
5717                         if (need_wakeup) {
5718                                 vm_map_entry_wakeup(map);
5719                                 need_wakeup = FALSE;
5720                         }
5721
5722                         wait_result = vm_map_entry_wait(map, interruptible);
5723
5724                         if (interruptible &&
5725                             wait_result == THREAD_INTERRUPTED) {
5726                                 /*
5727                                  * We do not clear the needs_wakeup flag,
5728                                  * since we cannot tell if we were the only one.
5729                                  */
5730                                 return KERN_ABORTED;
5731                         }
5732
5733                         /*
5734                          * The entry could have been clipped or it
5735                          * may not exist anymore.  Look it up again.
5736                          */
5737                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
5738                                 assert((map != kernel_map) &&
5739                                        (!entry->is_sub_map));
5740                                 /*
5741                                  * User: use the next entry
5742                                  */
5743                                 entry = first_entry->vme_next;
5744                                 s = entry->vme_start;
5745                         } else {
5746                                 entry = first_entry;
5747                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5748                         }
5749                         last_timestamp = map->timestamp;
5750                         continue;
5751                 } /* end in_transition */
5752
5753                 if (entry->wired_count) {
5754                         boolean_t       user_wire;
5755
5756                         user_wire = entry->user_wired_count > 0;
5757
5758                         /*
5759                          *      Remove a kernel wiring if requested
5760                          */
5761                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
5762                                 entry->wired_count--;
5763                         }
5764
5765                         /*
5766                          *      Remove all user wirings for proper accounting
5767                          */
5768                         if (entry->user_wired_count > 0) {
5769                                 while (entry->user_wired_count)
5770                                         subtract_wire_counts(map, entry, user_wire);
5771                         }
5772
5773                         if (entry->wired_count != 0) {
5774                                 assert(map != kernel_map);
5775                                 /*
5776                                  * Cannot continue.  Typical case is when
5777                                  * a user thread has physical io pending on
5778                                  * on this page.  Either wait for the
5779                                  * kernel wiring to go away or return an
5780                                  * error.
5781                                  */
5782                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
5783                                         wait_result_t wait_result;
5784
5785                                         assert(s == entry->vme_start);
5786                                         entry->needs_wakeup = TRUE;
5787                                         wait_result = vm_map_entry_wait(map,
5788                                                                         interruptible);
5789
5790                                         if (interruptible &&
5791                                             wait_result == THREAD_INTERRUPTED) {
5792                                                 /*
5793                                                  * We do not clear the
5794                                                  * needs_wakeup flag, since we
5795                                                  * cannot tell if we were the
5796                                                  * only one.
5797                                                  */
5798                                                 return KERN_ABORTED;
5799                                         }
5800
5801                                         /*
5802                                          * The entry could have been clipped or
5803                                          * it may not exist anymore.  Look it
5804                                          * up again.
5805                                          */
5806                                         if (!vm_map_lookup_entry(map, s,
5807                                                                  &first_entry)) {
5808                                                 assert(map != kernel_map);
5809                                                 /*
5810                                                  * User: use the next entry
5811                                                  */
5812                                                 entry = first_entry->vme_next;
5813                                                 s = entry->vme_start;
5814                                         } else {
5815                                                 entry = first_entry;
5816                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5817                                         }
5818                                         last_timestamp = map->timestamp;
5819                                         continue;
5820                                 }
5821                                 else {
5822                                         return KERN_FAILURE;
5823                                 }
5824                         }
5825
5826                         entry->in_transition = TRUE;
5827                         /*
5828                          * copy current entry.  see comment in vm_map_wire()
5829                          */
5830                         tmp_entry = *entry;
5831                         assert(s == entry->vme_start);
5832
5833                         /*
5834                          * We can unlock the map now. The in_transition
5835                          * state guarentees existance of the entry.
5836                          */
5837                         vm_map_unlock(map);
5838
5839                         if (tmp_entry.is_sub_map) {
5840                                 vm_map_t sub_map;
5841                                 vm_map_offset_t sub_start, sub_end;
5842                                 pmap_t pmap;
5843                                 vm_map_offset_t pmap_addr;
5844
5845
5846                                 sub_map = tmp_entry.object.sub_map;
5847                                 sub_start = tmp_entry.offset;
5848                                 sub_end = sub_start + (tmp_entry.vme_end -
5849                                                        tmp_entry.vme_start);
5850                                 if (tmp_entry.use_pmap) {
5851                                         pmap = sub_map->pmap;
5852                                         pmap_addr = tmp_entry.vme_start;
5853                                 } else {
5854                                         pmap = map->pmap;
5855                                         pmap_addr = tmp_entry.vme_start;
5856                                 }
5857                                 (void) vm_map_unwire_nested(sub_map,
5858                                                             sub_start, sub_end,
5859                                                             user_wire,
5860                                                             pmap, pmap_addr);
5861                         } else {
5862
5863                                 if (tmp_entry.object.vm_object == kernel_object) {
5864                                         pmap_protect_options(
5865                                                 map->pmap,
5866                                                 tmp_entry.vme_start,
5867                                                 tmp_entry.vme_end,
5868                                                 VM_PROT_NONE,
5869                                                 PMAP_OPTIONS_REMOVE,
5870                                                 NULL);
5871                                 }
5872                                 vm_fault_unwire(map, &tmp_entry,
5873                                                 tmp_entry.object.vm_object == kernel_object,
5874                                                 map->pmap, tmp_entry.vme_start);
5875                         }
5876
5877                         vm_map_lock(map);
5878
5879                         if (last_timestamp+1 != map->timestamp) {
5880                                 /*
5881                                  * Find the entry again.  It could have
5882                                  * been clipped after we unlocked the map.
5883                                  */
5884                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
5885                                         assert((map != kernel_map) &&
5886                                                (!entry->is_sub_map));
5887                                         first_entry = first_entry->vme_next;
5888                                         s = first_entry->vme_start;
5889                                 } else {
5890                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5891                                 }
5892                         } else {
5893                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
5894                                 first_entry = entry;
5895                         }
5896
5897                         last_timestamp = map->timestamp;
5898
5899                         entry = first_entry;
5900                         while ((entry != vm_map_to_entry(map)) &&
5901                                (entry->vme_start < tmp_entry.vme_end)) {
5902                                 assert(entry->in_transition);
5903                                 entry->in_transition = FALSE;
5904                                 if (entry->needs_wakeup) {
5905                                         entry->needs_wakeup = FALSE;
5906                                         need_wakeup = TRUE;
5907                                 }
5908                                 entry = entry->vme_next;
5909                         }
5910                         /*
5911                          * We have unwired the entry(s).  Go back and
5912                          * delete them.
5913                          */
5914                         entry = first_entry;
5915                         continue;
5916                 }
5917
5918                 /* entry is unwired */
5919                 assert(entry->wired_count == 0);
5920                 assert(entry->user_wired_count == 0);
5921
5922                 assert(s == entry->vme_start);
5923
5924                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
5925                         /*
5926                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
5927                          * vm_map_delete(), some map entries might have been
5928                          * transferred to a "zap_map", which doesn't have a
5929                          * pmap.  The original pmap has already been flushed
5930                          * in the vm_map_delete() call targeting the original
5931                          * map, but when we get to destroying the "zap_map",
5932                          * we don't have any pmap to flush, so let's just skip
5933                          * all this.
5934                          */
5935                 } else if (entry->is_sub_map) {
5936                         if (entry->use_pmap) {
5937 #ifndef NO_NESTED_PMAP
5938                                 pmap_unnest(map->pmap,
5939                                             (addr64_t)entry->vme_start,
5940                                             entry->vme_end - entry->vme_start);
5941 #endif  /* NO_NESTED_PMAP */
5942                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5943                                         /* clean up parent map/maps */
5944                                         vm_map_submap_pmap_clean(
5945                                                 map, entry->vme_start,
5946                                                 entry->vme_end,
5947                                                 entry->object.sub_map,
5948                                                 entry->offset);
5949                                 }
5950                         } else {
5951                                 vm_map_submap_pmap_clean(
5952                                         map, entry->vme_start, entry->vme_end,
5953                                         entry->object.sub_map,
5954                                         entry->offset);
5955                         }
5956                 } else if (entry->object.vm_object != kernel_object &&
5957                            entry->object.vm_object != compressor_object) {
5958                         object = entry->object.vm_object;
5959                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
5960                                 vm_object_pmap_protect_options(
5961                                         object, entry->offset,
5962                                         entry->vme_end - entry->vme_start,
5963                                         PMAP_NULL,
5964                                         entry->vme_start,
5965                                         VM_PROT_NONE,
5966                                         PMAP_OPTIONS_REMOVE);
5967                         } else if ((entry->object.vm_object !=
5968                                     VM_OBJECT_NULL) ||
5969                                    (map->pmap == kernel_pmap)) {
5970                                 /* Remove translations associated
5971                                  * with this range unless the entry
5972                                  * does not have an object, or
5973                                  * it's the kernel map or a descendant
5974                                  * since the platform could potentially
5975                                  * create "backdoor" mappings invisible
5976                                  * to the VM. It is expected that
5977                                  * objectless, non-kernel ranges
5978                                  * do not have such VM invisible
5979                                  * translations.
5980                                  */
5981                                 pmap_remove_options(map->pmap,
5982                                                     (addr64_t)entry->vme_start,
5983                                                     (addr64_t)entry->vme_end,
5984                                                     PMAP_OPTIONS_REMOVE);
5985                         }
5986                 }
5987
5988                 if (entry->iokit_acct) {
5989                         /* alternate accounting */
5990                         vm_map_iokit_unmapped_region(map,
5991                                                      (entry->vme_end -
5992                                                       entry->vme_start));
5993                         entry->iokit_acct = FALSE;
5994                 }
5995
5996                 /*
5997                  * All pmap mappings for this map entry must have been
5998                  * cleared by now.
5999                  */
6000 #if DEBUG
6001                 assert(vm_map_pmap_is_empty(map,
6002                                             entry->vme_start,
6003                                             entry->vme_end));
6004 #endif /* DEBUG */
6005
6006                 next = entry->vme_next;
6007
6008                 if (map->pmap == kernel_pmap &&
6009                     map->ref_count != 0 &&
6010                     entry->vme_end < end &&
6011                     (next == vm_map_to_entry(map) ||
6012                      next->vme_start != entry->vme_end)) {
6013                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
6014                               "hole after %p at 0x%llx\n",
6015                               map,
6016                               (uint64_t)start,
6017                               (uint64_t)end,
6018                               entry,
6019                               (uint64_t)entry->vme_end);
6020                 }
6021
6022                 s = next->vme_start;
6023                 last_timestamp = map->timestamp;
6024
6025                 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
6026                     zap_map != VM_MAP_NULL) {
6027                         vm_map_size_t entry_size;
6028                         /*
6029                          * The caller wants to save the affected VM map entries
6030                          * into the "zap_map".  The caller will take care of
6031                          * these entries.
6032                          */
6033                         /* unlink the entry from "map" ... */
6034                         vm_map_store_entry_unlink(map, entry);
6035                         /* ... and add it to the end of the "zap_map" */
6036                         vm_map_store_entry_link(zap_map,
6037                                           vm_map_last_entry(zap_map),
6038                                           entry);
6039                         entry_size = entry->vme_end - entry->vme_start;
6040                         map->size -= entry_size;
6041                         zap_map->size += entry_size;
6042                         /* we didn't unlock the map, so no timestamp increase */
6043                         last_timestamp--;
6044                 } else {
6045                         vm_map_entry_delete(map, entry);
6046                         /* vm_map_entry_delete unlocks the map */
6047                         vm_map_lock(map);
6048                 }
6049
6050                 entry = next;
6051
6052                 if(entry == vm_map_to_entry(map)) {
6053                         break;
6054                 }
6055                 if (last_timestamp+1 != map->timestamp) {
6056                         /*
6057                          * we are responsible for deleting everything
6058                          * from the give space, if someone has interfered
6059                          * we pick up where we left off, back fills should
6060                          * be all right for anyone except map_delete and
6061                          * we have to assume that the task has been fully
6062                          * disabled before we get here
6063                          */
6064                         if (!vm_map_lookup_entry(map, s, &entry)){
6065                                 entry = entry->vme_next;
6066                                 s = entry->vme_start;
6067                         } else {
6068                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
6069                         }
6070                         /*
6071                          * others can not only allocate behind us, we can
6072                          * also see coalesce while we don't have the map lock
6073                          */
6074                         if(entry == vm_map_to_entry(map)) {
6075                                 break;
6076                         }
6077                 }
6078                 last_timestamp = map->timestamp;
6079         }
6080
6081         if (map->wait_for_space)
6082                 thread_wakeup((event_t) map);
6083         /*
6084          * wake up anybody waiting on entries that we have already deleted.
6085          */
6086         if (need_wakeup)
6087                 vm_map_entry_wakeup(map);
6088
6089         return KERN_SUCCESS;
6090 }
6091
6092 /*
6093  *      vm_map_remove:
6094  *
6095  *      Remove the given address range from the target map.
6096  *      This is the exported form of vm_map_delete.
6097  */
6098 kern_return_t
6099 vm_map_remove(
6100         register vm_map_t       map,
6101         register vm_map_offset_t        start,
6102         register vm_map_offset_t        end,
6103         register boolean_t      flags)
6104 {
6105         register kern_return_t  result;
6106
6107         vm_map_lock(map);
6108         VM_MAP_RANGE_CHECK(map, start, end);
6109         /*
6110          * For the zone_map, the kernel controls the allocation/freeing of memory.
6111          * Any free to the zone_map should be within the bounds of the map and
6112          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
6113          * free to the zone_map into a no-op, there is a problem and we should
6114          * panic.
6115          */
6116         if ((map == zone_map) && (start == end))
6117                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
6118         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
6119         vm_map_unlock(map);
6120
6121         return(result);
6122 }
6123
6124
6125 /*
6126  *      Routine:        vm_map_copy_discard
6127  *
6128  *      Description:
6129  *              Dispose of a map copy object (returned by
6130  *              vm_map_copyin).
6131  */
6132 void
6133 vm_map_copy_discard(
6134         vm_map_copy_t   copy)
6135 {
6136         if (copy == VM_MAP_COPY_NULL)
6137                 return;
6138
6139         switch (copy->type) {
6140         case VM_MAP_COPY_ENTRY_LIST:
6141                 while (vm_map_copy_first_entry(copy) !=
6142                        vm_map_copy_to_entry(copy)) {
6143                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
6144
6145                         vm_map_copy_entry_unlink(copy, entry);
6146                         if (entry->is_sub_map) {
6147                                 vm_map_deallocate(entry->object.sub_map);
6148                         } else {
6149                                 vm_object_deallocate(entry->object.vm_object);
6150                         }
6151                         vm_map_copy_entry_dispose(copy, entry);
6152                 }
6153                 break;
6154         case VM_MAP_COPY_OBJECT:
6155                 vm_object_deallocate(copy->cpy_object);
6156                 break;
6157         case VM_MAP_COPY_KERNEL_BUFFER:
6158
6159                 /*
6160                  * The vm_map_copy_t and possibly the data buffer were
6161                  * allocated by a single call to kalloc(), i.e. the
6162                  * vm_map_copy_t was not allocated out of the zone.
6163                  */
6164                 kfree(copy, copy->cpy_kalloc_size);
6165                 return;
6166         }
6167         zfree(vm_map_copy_zone, copy);
6168 }
6169
6170 /*
6171  *      Routine:        vm_map_copy_copy
6172  *
6173  *      Description:
6174  *                      Move the information in a map copy object to
6175  *                      a new map copy object, leaving the old one
6176  *                      empty.
6177  *
6178  *                      This is used by kernel routines that need
6179  *                      to look at out-of-line data (in copyin form)
6180  *                      before deciding whether to return SUCCESS.
6181  *                      If the routine returns FAILURE, the original
6182  *                      copy object will be deallocated; therefore,
6183  *                      these routines must make a copy of the copy
6184  *                      object and leave the original empty so that
6185  *                      deallocation will not fail.
6186  */
6187 vm_map_copy_t
6188 vm_map_copy_copy(
6189         vm_map_copy_t   copy)
6190 {
6191         vm_map_copy_t   new_copy;
6192
6193         if (copy == VM_MAP_COPY_NULL)
6194                 return VM_MAP_COPY_NULL;
6195
6196         /*
6197          * Allocate a new copy object, and copy the information
6198          * from the old one into it.
6199          */
6200
6201         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
6202         *new_copy = *copy;
6203
6204         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
6205                 /*
6206                  * The links in the entry chain must be
6207                  * changed to point to the new copy object.
6208                  */
6209                 vm_map_copy_first_entry(copy)->vme_prev
6210                         = vm_map_copy_to_entry(new_copy);
6211                 vm_map_copy_last_entry(copy)->vme_next
6212                         = vm_map_copy_to_entry(new_copy);
6213         }
6214
6215         /*
6216          * Change the old copy object into one that contains
6217          * nothing to be deallocated.
6218          */
6219         copy->type = VM_MAP_COPY_OBJECT;
6220         copy->cpy_object = VM_OBJECT_NULL;
6221
6222         /*
6223          * Return the new object.
6224          */
6225         return new_copy;
6226 }
6227
6228 static kern_return_t
6229 vm_map_overwrite_submap_recurse(
6230         vm_map_t        dst_map,
6231         vm_map_offset_t dst_addr,
6232         vm_map_size_t   dst_size)
6233 {
6234         vm_map_offset_t dst_end;
6235         vm_map_entry_t  tmp_entry;
6236         vm_map_entry_t  entry;
6237         kern_return_t   result;
6238         boolean_t       encountered_sub_map = FALSE;
6239
6240
6241
6242         /*
6243          *      Verify that the destination is all writeable
6244          *      initially.  We have to trunc the destination
6245          *      address and round the copy size or we'll end up
6246          *      splitting entries in strange ways.
6247          */
6248
6249         dst_end = vm_map_round_page(dst_addr + dst_size,
6250                                     VM_MAP_PAGE_MASK(dst_map));
6251         vm_map_lock(dst_map);
6252
6253 start_pass_1:
6254         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6255                 vm_map_unlock(dst_map);
6256                 return(KERN_INVALID_ADDRESS);
6257         }
6258
6259         vm_map_clip_start(dst_map,
6260                           tmp_entry,
6261                           vm_map_trunc_page(dst_addr,
6262                                             VM_MAP_PAGE_MASK(dst_map)));
6263         if (tmp_entry->is_sub_map) {
6264                 /* clipping did unnest if needed */
6265                 assert(!tmp_entry->use_pmap);
6266         }
6267
6268         for (entry = tmp_entry;;) {
6269                 vm_map_entry_t  next;
6270
6271                 next = entry->vme_next;
6272                 while(entry->is_sub_map) {
6273                         vm_map_offset_t sub_start;
6274                         vm_map_offset_t sub_end;
6275                         vm_map_offset_t local_end;
6276
6277                         if (entry->in_transition) {
6278                                 /*
6279                                  * Say that we are waiting, and wait for entry.
6280                                  */
6281                                 entry->needs_wakeup = TRUE;
6282                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6283
6284                                 goto start_pass_1;
6285                         }
6286
6287                         encountered_sub_map = TRUE;
6288                         sub_start = entry->offset;
6289
6290                         if(entry->vme_end < dst_end)
6291                                 sub_end = entry->vme_end;
6292                         else
6293                                 sub_end = dst_end;
6294                         sub_end -= entry->vme_start;
6295                         sub_end += entry->offset;
6296                         local_end = entry->vme_end;
6297                         vm_map_unlock(dst_map);
6298
6299                         result = vm_map_overwrite_submap_recurse(
6300                                 entry->object.sub_map,
6301                                 sub_start,
6302                                 sub_end - sub_start);
6303
6304                         if(result != KERN_SUCCESS)
6305                                 return result;
6306                         if (dst_end <= entry->vme_end)
6307                                 return KERN_SUCCESS;
6308                         vm_map_lock(dst_map);
6309                         if(!vm_map_lookup_entry(dst_map, local_end,
6310                                                 &tmp_entry)) {
6311                                 vm_map_unlock(dst_map);
6312                                 return(KERN_INVALID_ADDRESS);
6313                         }
6314                         entry = tmp_entry;
6315                         next = entry->vme_next;
6316                 }
6317
6318                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6319                         vm_map_unlock(dst_map);
6320                         return(KERN_PROTECTION_FAILURE);
6321                 }
6322
6323                 /*
6324                  *      If the entry is in transition, we must wait
6325                  *      for it to exit that state.  Anything could happen
6326                  *      when we unlock the map, so start over.
6327                  */
6328                 if (entry->in_transition) {
6329
6330                         /*
6331                          * Say that we are waiting, and wait for entry.
6332                          */
6333                         entry->needs_wakeup = TRUE;
6334                         vm_map_entry_wait(dst_map, THREAD_UNINT);
6335
6336                         goto start_pass_1;
6337                 }
6338
6339 /*
6340  *              our range is contained completely within this map entry
6341  */
6342                 if (dst_end <= entry->vme_end) {
6343                         vm_map_unlock(dst_map);
6344                         return KERN_SUCCESS;
6345                 }
6346 /*
6347  *              check that range specified is contiguous region
6348  */
6349                 if ((next == vm_map_to_entry(dst_map)) ||
6350                     (next->vme_start != entry->vme_end)) {
6351                         vm_map_unlock(dst_map);
6352                         return(KERN_INVALID_ADDRESS);
6353                 }
6354
6355                 /*
6356                  *      Check for permanent objects in the destination.
6357                  */
6358                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6359                     ((!entry->object.vm_object->internal) ||
6360                      (entry->object.vm_object->true_share))) {
6361                         if(encountered_sub_map) {
6362                                 vm_map_unlock(dst_map);
6363                                 return(KERN_FAILURE);
6364                         }
6365                 }
6366
6367
6368                 entry = next;
6369         }/* for */
6370         vm_map_unlock(dst_map);
6371         return(KERN_SUCCESS);
6372 }
6373
6374 /*
6375  *      Routine:        vm_map_copy_overwrite
6376  *
6377  *      Description:
6378  *              Copy the memory described by the map copy
6379  *              object (copy; returned by vm_map_copyin) onto
6380  *              the specified destination region (dst_map, dst_addr).
6381  *              The destination must be writeable.
6382  *
6383  *              Unlike vm_map_copyout, this routine actually
6384  *              writes over previously-mapped memory.  If the
6385  *              previous mapping was to a permanent (user-supplied)
6386  *              memory object, it is preserved.
6387  *
6388  *              The attributes (protection and inheritance) of the
6389  *              destination region are preserved.
6390  *
6391  *              If successful, consumes the copy object.
6392  *              Otherwise, the caller is responsible for it.
6393  *
6394  *      Implementation notes:
6395  *              To overwrite aligned temporary virtual memory, it is
6396  *              sufficient to remove the previous mapping and insert
6397  *              the new copy.  This replacement is done either on
6398  *              the whole region (if no permanent virtual memory
6399  *              objects are embedded in the destination region) or
6400  *              in individual map entries.
6401  *
6402  *              To overwrite permanent virtual memory , it is necessary
6403  *              to copy each page, as the external memory management
6404  *              interface currently does not provide any optimizations.
6405  *
6406  *              Unaligned memory also has to be copied.  It is possible
6407  *              to use 'vm_trickery' to copy the aligned data.  This is
6408  *              not done but not hard to implement.
6409  *
6410  *              Once a page of permanent memory has been overwritten,
6411  *              it is impossible to interrupt this function; otherwise,
6412  *              the call would be neither atomic nor location-independent.
6413  *              The kernel-state portion of a user thread must be
6414  *              interruptible.
6415  *
6416  *              It may be expensive to forward all requests that might
6417  *              overwrite permanent memory (vm_write, vm_copy) to
6418  *              uninterruptible kernel threads.  This routine may be
6419  *              called by interruptible threads; however, success is
6420  *              not guaranteed -- if the request cannot be performed
6421  *              atomically and interruptibly, an error indication is
6422  *              returned.
6423  */
6424
6425 static kern_return_t
6426 vm_map_copy_overwrite_nested(
6427         vm_map_t                dst_map,
6428         vm_map_address_t        dst_addr,
6429         vm_map_copy_t           copy,
6430         boolean_t               interruptible,
6431         pmap_t                  pmap,
6432         boolean_t               discard_on_success)
6433 {
6434         vm_map_offset_t         dst_end;
6435         vm_map_entry_t          tmp_entry;
6436         vm_map_entry_t          entry;
6437         kern_return_t           kr;
6438         boolean_t               aligned = TRUE;
6439         boolean_t               contains_permanent_objects = FALSE;
6440         boolean_t               encountered_sub_map = FALSE;
6441         vm_map_offset_t         base_addr;
6442         vm_map_size_t           copy_size;
6443         vm_map_size_t           total_size;
6444
6445
6446         /*
6447          *      Check for null copy object.
6448          */
6449
6450         if (copy == VM_MAP_COPY_NULL)
6451                 return(KERN_SUCCESS);
6452
6453         /*
6454          *      Check for special kernel buffer allocated
6455          *      by new_ipc_kmsg_copyin.
6456          */
6457
6458         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
6459                 return(vm_map_copyout_kernel_buffer(
6460                                dst_map, &dst_addr,
6461                                copy, TRUE, discard_on_success));
6462         }
6463
6464         /*
6465          *      Only works for entry lists at the moment.  Will
6466          *      support page lists later.
6467          */
6468
6469         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
6470
6471         if (copy->size == 0) {
6472                 if (discard_on_success)
6473                         vm_map_copy_discard(copy);
6474                 return(KERN_SUCCESS);
6475         }
6476
6477         /*
6478          *      Verify that the destination is all writeable
6479          *      initially.  We have to trunc the destination
6480          *      address and round the copy size or we'll end up
6481          *      splitting entries in strange ways.
6482          */
6483
6484         if (!VM_MAP_PAGE_ALIGNED(copy->size,
6485                                  VM_MAP_PAGE_MASK(dst_map)) ||
6486             !VM_MAP_PAGE_ALIGNED(copy->offset,
6487                                  VM_MAP_PAGE_MASK(dst_map)) ||
6488             !VM_MAP_PAGE_ALIGNED(dst_addr,
6489                                  VM_MAP_PAGE_MASK(dst_map)))
6490         {
6491                 aligned = FALSE;
6492                 dst_end = vm_map_round_page(dst_addr + copy->size,
6493                                             VM_MAP_PAGE_MASK(dst_map));
6494         } else {
6495                 dst_end = dst_addr + copy->size;
6496         }
6497
6498         vm_map_lock(dst_map);
6499
6500         /* LP64todo - remove this check when vm_map_commpage64()
6501          * no longer has to stuff in a map_entry for the commpage
6502          * above the map's max_offset.
6503          */
6504         if (dst_addr >= dst_map->max_offset) {
6505                 vm_map_unlock(dst_map);
6506                 return(KERN_INVALID_ADDRESS);
6507         }
6508
6509 start_pass_1:
6510         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
6511                 vm_map_unlock(dst_map);
6512                 return(KERN_INVALID_ADDRESS);
6513         }
6514         vm_map_clip_start(dst_map,
6515                           tmp_entry,
6516                           vm_map_trunc_page(dst_addr,
6517                                             VM_MAP_PAGE_MASK(dst_map)));
6518         for (entry = tmp_entry;;) {
6519                 vm_map_entry_t  next = entry->vme_next;
6520
6521                 while(entry->is_sub_map) {
6522                         vm_map_offset_t sub_start;
6523                         vm_map_offset_t sub_end;
6524                         vm_map_offset_t local_end;
6525
6526                         if (entry->in_transition) {
6527
6528                                 /*
6529                                  * Say that we are waiting, and wait for entry.
6530                                  */
6531                                 entry->needs_wakeup = TRUE;
6532                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6533
6534                                 goto start_pass_1;
6535                         }
6536
6537                         local_end = entry->vme_end;
6538                         if (!(entry->needs_copy)) {
6539                                 /* if needs_copy we are a COW submap */
6540                                 /* in such a case we just replace so */
6541                                 /* there is no need for the follow-  */
6542                                 /* ing check.                        */
6543                                 encountered_sub_map = TRUE;
6544                                 sub_start = entry->offset;
6545
6546                                 if(entry->vme_end < dst_end)
6547                                         sub_end = entry->vme_end;
6548                                 else
6549                                         sub_end = dst_end;
6550                                 sub_end -= entry->vme_start;
6551                                 sub_end += entry->offset;
6552                                 vm_map_unlock(dst_map);
6553
6554                                 kr = vm_map_overwrite_submap_recurse(
6555                                         entry->object.sub_map,
6556                                         sub_start,
6557                                         sub_end - sub_start);
6558                                 if(kr != KERN_SUCCESS)
6559                                         return kr;
6560                                 vm_map_lock(dst_map);
6561                         }
6562
6563                         if (dst_end <= entry->vme_end)
6564                                 goto start_overwrite;
6565                         if(!vm_map_lookup_entry(dst_map, local_end,
6566                                                 &entry)) {
6567                                 vm_map_unlock(dst_map);
6568                                 return(KERN_INVALID_ADDRESS);
6569                         }
6570                         next = entry->vme_next;
6571                 }
6572
6573                 if ( ! (entry->protection & VM_PROT_WRITE)) {
6574                         vm_map_unlock(dst_map);
6575                         return(KERN_PROTECTION_FAILURE);
6576                 }
6577
6578                 /*
6579                  *      If the entry is in transition, we must wait
6580                  *      for it to exit that state.  Anything could happen
6581                  *      when we unlock the map, so start over.
6582                  */
6583                 if (entry->in_transition) {
6584
6585                         /*
6586                          * Say that we are waiting, and wait for entry.
6587                          */
6588                         entry->needs_wakeup = TRUE;
6589                         vm_map_entry_wait(dst_map, THREAD_UNINT);
6590
6591                         goto start_pass_1;
6592                 }
6593
6594 /*
6595  *              our range is contained completely within this map entry
6596  */
6597                 if (dst_end <= entry->vme_end)
6598                         break;
6599 /*
6600  *              check that range specified is contiguous region
6601  */
6602                 if ((next == vm_map_to_entry(dst_map)) ||
6603                     (next->vme_start != entry->vme_end)) {
6604                         vm_map_unlock(dst_map);
6605                         return(KERN_INVALID_ADDRESS);
6606                 }
6607
6608
6609                 /*
6610                  *      Check for permanent objects in the destination.
6611                  */
6612                 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
6613                     ((!entry->object.vm_object->internal) ||
6614                      (entry->object.vm_object->true_share))) {
6615                         contains_permanent_objects = TRUE;
6616                 }
6617
6618                 entry = next;
6619         }/* for */
6620
6621 start_overwrite:
6622         /*
6623          *      If there are permanent objects in the destination, then
6624          *      the copy cannot be interrupted.
6625          */
6626
6627         if (interruptible && contains_permanent_objects) {
6628                 vm_map_unlock(dst_map);
6629                 return(KERN_FAILURE);   /* XXX */
6630         }
6631
6632         /*
6633          *
6634          *      Make a second pass, overwriting the data
6635          *      At the beginning of each loop iteration,
6636          *      the next entry to be overwritten is "tmp_entry"
6637          *      (initially, the value returned from the lookup above),
6638          *      and the starting address expected in that entry
6639          *      is "start".
6640          */
6641
6642         total_size = copy->size;
6643         if(encountered_sub_map) {
6644                 copy_size = 0;
6645                 /* re-calculate tmp_entry since we've had the map */
6646                 /* unlocked */
6647                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
6648                         vm_map_unlock(dst_map);
6649                         return(KERN_INVALID_ADDRESS);
6650                 }
6651         } else {
6652                 copy_size = copy->size;
6653         }
6654
6655         base_addr = dst_addr;
6656         while(TRUE) {
6657                 /* deconstruct the copy object and do in parts */
6658                 /* only in sub_map, interruptable case */
6659                 vm_map_entry_t  copy_entry;
6660                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
6661                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
6662                 int             nentries;
6663                 int             remaining_entries = 0;
6664                 vm_map_offset_t new_offset = 0;
6665
6666                 for (entry = tmp_entry; copy_size == 0;) {
6667                         vm_map_entry_t  next;
6668
6669                         next = entry->vme_next;
6670
6671                         /* tmp_entry and base address are moved along */
6672                         /* each time we encounter a sub-map.  Otherwise */
6673                         /* entry can outpase tmp_entry, and the copy_size */
6674                         /* may reflect the distance between them */
6675                         /* if the current entry is found to be in transition */
6676                         /* we will start over at the beginning or the last */
6677                         /* encounter of a submap as dictated by base_addr */
6678                         /* we will zero copy_size accordingly. */
6679                         if (entry->in_transition) {
6680                                 /*
6681                                  * Say that we are waiting, and wait for entry.
6682                                  */
6683                                 entry->needs_wakeup = TRUE;
6684                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
6685
6686                                 if(!vm_map_lookup_entry(dst_map, base_addr,
6687                                                         &tmp_entry)) {
6688                                         vm_map_unlock(dst_map);
6689                                         return(KERN_INVALID_ADDRESS);
6690                                 }
6691                                 copy_size = 0;
6692                                 entry = tmp_entry;
6693                                 continue;
6694                         }
6695                         if(entry->is_sub_map) {
6696                                 vm_map_offset_t sub_start;
6697                                 vm_map_offset_t sub_end;
6698                                 vm_map_offset_t local_end;
6699
6700                                 if (entry->needs_copy) {
6701                                         /* if this is a COW submap */
6702                                         /* just back the range with a */
6703                                         /* anonymous entry */
6704                                         if(entry->vme_end < dst_end)
6705                                                 sub_end = entry->vme_end;
6706                                         else
6707                                                 sub_end = dst_end;
6708                                         if(entry->vme_start < base_addr)
6709                                                 sub_start = base_addr;
6710                                         else
6711                                                 sub_start = entry->vme_start;
6712                                         vm_map_clip_end(
6713                                                 dst_map, entry, sub_end);
6714                                         vm_map_clip_start(
6715                                                 dst_map, entry, sub_start);
6716                                         assert(!entry->use_pmap);
6717                                         entry->is_sub_map = FALSE;
6718                                         vm_map_deallocate(
6719                                                 entry->object.sub_map);
6720                                         entry->object.sub_map = NULL;
6721                                         entry->is_shared = FALSE;
6722                                         entry->needs_copy = FALSE;
6723                                         entry->offset = 0;
6724                                         /*
6725                                          * XXX FBDP
6726                                          * We should propagate the protections
6727                                          * of the submap entry here instead
6728                                          * of forcing them to VM_PROT_ALL...
6729                                          * Or better yet, we should inherit
6730                                          * the protection of the copy_entry.
6731                                          */
6732                                         entry->protection = VM_PROT_ALL;
6733                                         entry->max_protection = VM_PROT_ALL;
6734                                         entry->wired_count = 0;
6735                                         entry->user_wired_count = 0;
6736                                         if(entry->inheritance
6737                                            == VM_INHERIT_SHARE)
6738                                                 entry->inheritance = VM_INHERIT_COPY;
6739                                         continue;
6740                                 }
6741                                 /* first take care of any non-sub_map */
6742                                 /* entries to send */
6743                                 if(base_addr < entry->vme_start) {
6744                                         /* stuff to send */
6745                                         copy_size =
6746                                                 entry->vme_start - base_addr;
6747                                         break;
6748                                 }
6749                                 sub_start = entry->offset;
6750
6751                                 if(entry->vme_end < dst_end)
6752                                         sub_end = entry->vme_end;
6753                                 else
6754                                         sub_end = dst_end;
6755                                 sub_end -= entry->vme_start;
6756                                 sub_end += entry->offset;
6757                                 local_end = entry->vme_end;
6758                                 vm_map_unlock(dst_map);
6759                                 copy_size = sub_end - sub_start;
6760
6761                                 /* adjust the copy object */
6762                                 if (total_size > copy_size) {
6763                                         vm_map_size_t   local_size = 0;
6764                                         vm_map_size_t   entry_size;
6765
6766                                         nentries = 1;
6767                                         new_offset = copy->offset;
6768                                         copy_entry = vm_map_copy_first_entry(copy);
6769                                         while(copy_entry !=
6770                                               vm_map_copy_to_entry(copy)){
6771                                                 entry_size = copy_entry->vme_end -
6772                                                         copy_entry->vme_start;
6773                                                 if((local_size < copy_size) &&
6774                                                    ((local_size + entry_size)
6775                                                     >= copy_size)) {
6776                                                         vm_map_copy_clip_end(copy,
6777                                                                              copy_entry,
6778                                                                              copy_entry->vme_start +
6779                                                                              (copy_size - local_size));
6780                                                         entry_size = copy_entry->vme_end -
6781                                                                 copy_entry->vme_start;
6782                                                         local_size += entry_size;
6783                                                         new_offset += entry_size;
6784                                                 }
6785                                                 if(local_size >= copy_size) {
6786                                                         next_copy = copy_entry->vme_next;
6787                                                         copy_entry->vme_next =
6788                                                                 vm_map_copy_to_entry(copy);
6789                                                         previous_prev =
6790                                                                 copy->cpy_hdr.links.prev;
6791                                                         copy->cpy_hdr.links.prev = copy_entry;
6792                                                         copy->size = copy_size;
6793                                                         remaining_entries =
6794                                                                 copy->cpy_hdr.nentries;
6795                                                         remaining_entries -= nentries;
6796                                                         copy->cpy_hdr.nentries = nentries;
6797                                                         break;
6798                                                 } else {
6799                                                         local_size += entry_size;
6800                                                         new_offset += entry_size;
6801                                                         nentries++;
6802                                                 }
6803                                                 copy_entry = copy_entry->vme_next;
6804                                         }
6805                                 }
6806
6807                                 if((entry->use_pmap) && (pmap == NULL)) {
6808                                         kr = vm_map_copy_overwrite_nested(
6809                                                 entry->object.sub_map,
6810                                                 sub_start,
6811                                                 copy,
6812                                                 interruptible,
6813                                                 entry->object.sub_map->pmap,
6814                                                 TRUE);
6815                                 } else if (pmap != NULL) {
6816                                         kr = vm_map_copy_overwrite_nested(
6817                                                 entry->object.sub_map,
6818                                                 sub_start,
6819                                                 copy,
6820                                                 interruptible, pmap,
6821                                                 TRUE);
6822                                 } else {
6823                                         kr = vm_map_copy_overwrite_nested(
6824                                                 entry->object.sub_map,
6825                                                 sub_start,
6826                                                 copy,
6827                                                 interruptible,
6828                                                 dst_map->pmap,
6829                                                 TRUE);
6830                                 }
6831                                 if(kr != KERN_SUCCESS) {
6832                                         if(next_copy != NULL) {
6833                                                 copy->cpy_hdr.nentries +=
6834                                                         remaining_entries;
6835                                                 copy->cpy_hdr.links.prev->vme_next =
6836                                                         next_copy;
6837                                                 copy->cpy_hdr.links.prev
6838                                                         = previous_prev;
6839                                                 copy->size = total_size;
6840                                         }
6841                                         return kr;
6842                                 }
6843                                 if (dst_end <= local_end) {
6844                                         return(KERN_SUCCESS);
6845                                 }
6846                                 /* otherwise copy no longer exists, it was */
6847                                 /* destroyed after successful copy_overwrite */
6848                                 copy = (vm_map_copy_t)
6849                                         zalloc(vm_map_copy_zone);
6850                                 vm_map_copy_first_entry(copy) =
6851                                         vm_map_copy_last_entry(copy) =
6852                                         vm_map_copy_to_entry(copy);
6853                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
6854                                 copy->offset = new_offset;
6855
6856                                 /*
6857                                  * XXX FBDP
6858                                  * this does not seem to deal with
6859                                  * the VM map store (R&B tree)
6860                                  */
6861
6862                                 total_size -= copy_size;
6863                                 copy_size = 0;
6864                                 /* put back remainder of copy in container */
6865                                 if(next_copy != NULL) {
6866                                         copy->cpy_hdr.nentries = remaining_entries;
6867                                         copy->cpy_hdr.links.next = next_copy;
6868                                         copy->cpy_hdr.links.prev = previous_prev;
6869                                         copy->size = total_size;
6870                                         next_copy->vme_prev =
6871                                                 vm_map_copy_to_entry(copy);
6872                                         next_copy = NULL;
6873                                 }
6874                                 base_addr = local_end;
6875                                 vm_map_lock(dst_map);
6876                                 if(!vm_map_lookup_entry(dst_map,
6877                                                         local_end, &tmp_entry)) {
6878                                         vm_map_unlock(dst_map);
6879                                         return(KERN_INVALID_ADDRESS);
6880                                 }
6881                                 entry = tmp_entry;
6882                                 continue;
6883                         }
6884                         if (dst_end <= entry->vme_end) {
6885                                 copy_size = dst_end - base_addr;
6886                                 break;
6887                         }
6888
6889                         if ((next == vm_map_to_entry(dst_map)) ||
6890                             (next->vme_start != entry->vme_end)) {
6891                                 vm_map_unlock(dst_map);
6892                                 return(KERN_INVALID_ADDRESS);
6893                         }
6894
6895                         entry = next;
6896                 }/* for */
6897
6898                 next_copy = NULL;
6899                 nentries = 1;
6900
6901                 /* adjust the copy object */
6902                 if (total_size > copy_size) {
6903                         vm_map_size_t   local_size = 0;
6904                         vm_map_size_t   entry_size;
6905
6906                         new_offset = copy->offset;
6907                         copy_entry = vm_map_copy_first_entry(copy);
6908                         while(copy_entry != vm_map_copy_to_entry(copy)) {
6909                                 entry_size = copy_entry->vme_end -
6910                                         copy_entry->vme_start;
6911                                 if((local_size < copy_size) &&
6912                                    ((local_size + entry_size)
6913                                     >= copy_size)) {
6914                                         vm_map_copy_clip_end(copy, copy_entry,
6915                                                              copy_entry->vme_start +
6916                                                              (copy_size - local_size));
6917                                         entry_size = copy_entry->vme_end -
6918                                                 copy_entry->vme_start;
6919                                         local_size += entry_size;
6920                                         new_offset += entry_size;
6921                                 }
6922                                 if(local_size >= copy_size) {
6923                                         next_copy = copy_entry->vme_next;
6924                                         copy_entry->vme_next =
6925                                                 vm_map_copy_to_entry(copy);
6926                                         previous_prev =
6927                                                 copy->cpy_hdr.links.prev;
6928                                         copy->cpy_hdr.links.prev = copy_entry;
6929                                         copy->size = copy_size;
6930                                         remaining_entries =
6931                                                 copy->cpy_hdr.nentries;
6932                                         remaining_entries -= nentries;
6933                                         copy->cpy_hdr.nentries = nentries;
6934                                         break;
6935                                 } else {
6936                                         local_size += entry_size;
6937                                         new_offset += entry_size;
6938                                         nentries++;
6939                                 }
6940                                 copy_entry = copy_entry->vme_next;
6941                         }
6942                 }
6943
6944                 if (aligned) {
6945                         pmap_t  local_pmap;
6946
6947                         if(pmap)
6948                                 local_pmap = pmap;
6949                         else
6950                                 local_pmap = dst_map->pmap;
6951
6952                         if ((kr =  vm_map_copy_overwrite_aligned(
6953                                      dst_map, tmp_entry, copy,
6954                                      base_addr, local_pmap)) != KERN_SUCCESS) {
6955                                 if(next_copy != NULL) {
6956                                         copy->cpy_hdr.nentries +=
6957                                                 remaining_entries;
6958                                         copy->cpy_hdr.links.prev->vme_next =
6959                                                 next_copy;
6960                                         copy->cpy_hdr.links.prev =
6961                                                 previous_prev;
6962                                         copy->size += copy_size;
6963                                 }
6964                                 return kr;
6965                         }
6966                         vm_map_unlock(dst_map);
6967                 } else {
6968                         /*
6969                          * Performance gain:
6970                          *
6971                          * if the copy and dst address are misaligned but the same
6972                          * offset within the page we can copy_not_aligned the
6973                          * misaligned parts and copy aligned the rest.  If they are
6974                          * aligned but len is unaligned we simply need to copy
6975                          * the end bit unaligned.  We'll need to split the misaligned
6976                          * bits of the region in this case !
6977                          */
6978                         /* ALWAYS UNLOCKS THE dst_map MAP */
6979                         kr = vm_map_copy_overwrite_unaligned(
6980                                 dst_map,
6981                                 tmp_entry,
6982                                 copy,
6983                                 base_addr,
6984                                 discard_on_success);
6985                         if (kr != KERN_SUCCESS) {
6986                                 if(next_copy != NULL) {
6987                                         copy->cpy_hdr.nentries +=
6988                                                 remaining_entries;
6989                                         copy->cpy_hdr.links.prev->vme_next =
6990                                                 next_copy;
6991                                         copy->cpy_hdr.links.prev =
6992                                                 previous_prev;
6993                                         copy->size += copy_size;
6994                                 }
6995                                 return kr;
6996                         }
6997                 }
6998                 total_size -= copy_size;
6999                 if(total_size == 0)
7000                         break;
7001                 base_addr += copy_size;
7002                 copy_size = 0;
7003                 copy->offset = new_offset;
7004                 if(next_copy != NULL) {
7005                         copy->cpy_hdr.nentries = remaining_entries;
7006                         copy->cpy_hdr.links.next = next_copy;
7007                         copy->cpy_hdr.links.prev = previous_prev;
7008                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
7009                         copy->size = total_size;
7010                 }
7011                 vm_map_lock(dst_map);
7012                 while(TRUE) {
7013                         if (!vm_map_lookup_entry(dst_map,
7014                                                  base_addr, &tmp_entry)) {
7015                                 vm_map_unlock(dst_map);
7016                                 return(KERN_INVALID_ADDRESS);
7017                         }
7018                         if (tmp_entry->in_transition) {
7019                                 entry->needs_wakeup = TRUE;
7020                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7021                         } else {
7022                                 break;
7023                         }
7024                 }
7025                 vm_map_clip_start(dst_map,
7026                                   tmp_entry,
7027                                   vm_map_trunc_page(base_addr,
7028                                                     VM_MAP_PAGE_MASK(dst_map)));
7029
7030                 entry = tmp_entry;
7031         } /* while */
7032
7033         /*
7034          *      Throw away the vm_map_copy object
7035          */
7036         if (discard_on_success)
7037                 vm_map_copy_discard(copy);
7038
7039         return(KERN_SUCCESS);
7040 }/* vm_map_copy_overwrite */
7041
7042 kern_return_t
7043 vm_map_copy_overwrite(
7044         vm_map_t        dst_map,
7045         vm_map_offset_t dst_addr,
7046         vm_map_copy_t   copy,
7047         boolean_t       interruptible)
7048 {
7049         vm_map_size_t   head_size, tail_size;
7050         vm_map_copy_t   head_copy, tail_copy;
7051         vm_map_offset_t head_addr, tail_addr;
7052         vm_map_entry_t  entry;
7053         kern_return_t   kr;
7054
7055         head_size = 0;
7056         tail_size = 0;
7057         head_copy = NULL;
7058         tail_copy = NULL;
7059         head_addr = 0;
7060         tail_addr = 0;
7061
7062         if (interruptible ||
7063             copy == VM_MAP_COPY_NULL ||
7064             copy->type != VM_MAP_COPY_ENTRY_LIST) {
7065                 /*
7066                  * We can't split the "copy" map if we're interruptible
7067                  * or if we don't have a "copy" map...
7068                  */
7069         blunt_copy:
7070                 return vm_map_copy_overwrite_nested(dst_map,
7071                                                     dst_addr,
7072                                                     copy,
7073                                                     interruptible,
7074                                                     (pmap_t) NULL,
7075                                                     TRUE);
7076         }
7077
7078         if (copy->size < 3 * PAGE_SIZE) {
7079                 /*
7080                  * Too small to bother with optimizing...
7081                  */
7082                 goto blunt_copy;
7083         }
7084
7085         if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) !=
7086             (copy->offset & VM_MAP_PAGE_MASK(dst_map))) {
7087                 /*
7088                  * Incompatible mis-alignment of source and destination...
7089                  */
7090                 goto blunt_copy;
7091         }
7092
7093         /*
7094          * Proper alignment or identical mis-alignment at the beginning.
7095          * Let's try and do a small unaligned copy first (if needed)
7096          * and then an aligned copy for the rest.
7097          */
7098         if (!page_aligned(dst_addr)) {
7099                 head_addr = dst_addr;
7100                 head_size = (VM_MAP_PAGE_SIZE(dst_map) -
7101                              (copy->offset & VM_MAP_PAGE_MASK(dst_map)));
7102         }
7103         if (!page_aligned(copy->offset + copy->size)) {
7104                 /*
7105                  * Mis-alignment at the end.
7106                  * Do an aligned copy up to the last page and
7107                  * then an unaligned copy for the remaining bytes.
7108                  */
7109                 tail_size = ((copy->offset + copy->size) &
7110                              VM_MAP_PAGE_MASK(dst_map));
7111                 tail_addr = dst_addr + copy->size - tail_size;
7112         }
7113
7114         if (head_size + tail_size == copy->size) {
7115                 /*
7116                  * It's all unaligned, no optimization possible...
7117                  */
7118                 goto blunt_copy;
7119         }
7120
7121         /*
7122          * Can't optimize if there are any submaps in the
7123          * destination due to the way we free the "copy" map
7124          * progressively in vm_map_copy_overwrite_nested()
7125          * in that case.
7126          */
7127         vm_map_lock_read(dst_map);
7128         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
7129                 vm_map_unlock_read(dst_map);
7130                 goto blunt_copy;
7131         }
7132         for (;
7133              (entry != vm_map_copy_to_entry(copy) &&
7134               entry->vme_start < dst_addr + copy->size);
7135              entry = entry->vme_next) {
7136                 if (entry->is_sub_map) {
7137                         vm_map_unlock_read(dst_map);
7138                         goto blunt_copy;
7139                 }
7140         }
7141         vm_map_unlock_read(dst_map);
7142
7143         if (head_size) {
7144                 /*
7145                  * Unaligned copy of the first "head_size" bytes, to reach
7146                  * a page boundary.
7147                  */
7148
7149                 /*
7150                  * Extract "head_copy" out of "copy".
7151                  */
7152                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7153                 vm_map_copy_first_entry(head_copy) =
7154                         vm_map_copy_to_entry(head_copy);
7155                 vm_map_copy_last_entry(head_copy) =
7156                         vm_map_copy_to_entry(head_copy);
7157                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
7158                 head_copy->cpy_hdr.nentries = 0;
7159                 head_copy->cpy_hdr.entries_pageable =
7160                         copy->cpy_hdr.entries_pageable;
7161                 vm_map_store_init(&head_copy->cpy_hdr);
7162
7163                 head_copy->offset = copy->offset;
7164                 head_copy->size = head_size;
7165
7166                 copy->offset += head_size;
7167                 copy->size -= head_size;
7168
7169                 entry = vm_map_copy_first_entry(copy);
7170                 vm_map_copy_clip_end(copy, entry, copy->offset);
7171                 vm_map_copy_entry_unlink(copy, entry);
7172                 vm_map_copy_entry_link(head_copy,
7173                                        vm_map_copy_to_entry(head_copy),
7174                                        entry);
7175
7176                 /*
7177                  * Do the unaligned copy.
7178                  */
7179                 kr = vm_map_copy_overwrite_nested(dst_map,
7180                                                   head_addr,
7181                                                   head_copy,
7182                                                   interruptible,
7183                                                   (pmap_t) NULL,
7184                                                   FALSE);
7185                 if (kr != KERN_SUCCESS)
7186                         goto done;
7187         }
7188
7189         if (tail_size) {
7190                 /*
7191                  * Extract "tail_copy" out of "copy".
7192                  */
7193                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7194                 vm_map_copy_first_entry(tail_copy) =
7195                         vm_map_copy_to_entry(tail_copy);
7196                 vm_map_copy_last_entry(tail_copy) =
7197                         vm_map_copy_to_entry(tail_copy);
7198                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
7199                 tail_copy->cpy_hdr.nentries = 0;
7200                 tail_copy->cpy_hdr.entries_pageable =
7201                         copy->cpy_hdr.entries_pageable;
7202                 vm_map_store_init(&tail_copy->cpy_hdr);
7203
7204                 tail_copy->offset = copy->offset + copy->size - tail_size;
7205                 tail_copy->size = tail_size;
7206
7207                 copy->size -= tail_size;
7208
7209                 entry = vm_map_copy_last_entry(copy);
7210                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
7211                 entry = vm_map_copy_last_entry(copy);
7212                 vm_map_copy_entry_unlink(copy, entry);
7213                 vm_map_copy_entry_link(tail_copy,
7214                                        vm_map_copy_last_entry(tail_copy),
7215                                        entry);
7216         }
7217
7218         /*
7219          * Copy most (or possibly all) of the data.
7220          */
7221         kr = vm_map_copy_overwrite_nested(dst_map,
7222                                           dst_addr + head_size,
7223                                           copy,
7224                                           interruptible,
7225                                           (pmap_t) NULL,
7226                                           FALSE);
7227         if (kr != KERN_SUCCESS) {
7228                 goto done;
7229         }
7230
7231         if (tail_size) {
7232                 kr = vm_map_copy_overwrite_nested(dst_map,
7233                                                   tail_addr,
7234                                                   tail_copy,
7235                                                   interruptible,
7236                                                   (pmap_t) NULL,
7237                                                   FALSE);
7238         }
7239
7240 done:
7241         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
7242         if (kr == KERN_SUCCESS) {
7243                 /*
7244                  * Discard all the copy maps.
7245                  */
7246                 if (head_copy) {
7247                         vm_map_copy_discard(head_copy);
7248                         head_copy = NULL;
7249                 }
7250                 vm_map_copy_discard(copy);
7251                 if (tail_copy) {
7252                         vm_map_copy_discard(tail_copy);
7253                         tail_copy = NULL;
7254                 }
7255         } else {
7256                 /*
7257                  * Re-assemble the original copy map.
7258                  */
7259                 if (head_copy) {
7260                         entry = vm_map_copy_first_entry(head_copy);
7261                         vm_map_copy_entry_unlink(head_copy, entry);
7262                         vm_map_copy_entry_link(copy,
7263                                                vm_map_copy_to_entry(copy),
7264                                                entry);
7265                         copy->offset -= head_size;
7266                         copy->size += head_size;
7267                         vm_map_copy_discard(head_copy);
7268                         head_copy = NULL;
7269                 }
7270                 if (tail_copy) {
7271                         entry = vm_map_copy_last_entry(tail_copy);
7272                         vm_map_copy_entry_unlink(tail_copy, entry);
7273                         vm_map_copy_entry_link(copy,
7274                                                vm_map_copy_last_entry(copy),
7275                                                entry);
7276                         copy->size += tail_size;
7277                         vm_map_copy_discard(tail_copy);
7278                         tail_copy = NULL;
7279                 }
7280         }
7281         return kr;
7282 }
7283
7284
7285 /*
7286  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
7287  *
7288  *      Decription:
7289  *      Physically copy unaligned data
7290  *
7291  *      Implementation:
7292  *      Unaligned parts of pages have to be physically copied.  We use
7293  *      a modified form of vm_fault_copy (which understands none-aligned
7294  *      page offsets and sizes) to do the copy.  We attempt to copy as
7295  *      much memory in one go as possibly, however vm_fault_copy copies
7296  *      within 1 memory object so we have to find the smaller of "amount left"
7297  *      "source object data size" and "target object data size".  With
7298  *      unaligned data we don't need to split regions, therefore the source
7299  *      (copy) object should be one map entry, the target range may be split
7300  *      over multiple map entries however.  In any event we are pessimistic
7301  *      about these assumptions.
7302  *
7303  *      Assumptions:
7304  *      dst_map is locked on entry and is return locked on success,
7305  *      unlocked on error.
7306  */
7307
7308 static kern_return_t
7309 vm_map_copy_overwrite_unaligned(
7310         vm_map_t        dst_map,
7311         vm_map_entry_t  entry,
7312         vm_map_copy_t   copy,
7313         vm_map_offset_t start,
7314         boolean_t       discard_on_success)
7315 {
7316         vm_map_entry_t          copy_entry;
7317         vm_map_entry_t          copy_entry_next;
7318         vm_map_version_t        version;
7319         vm_object_t             dst_object;
7320         vm_object_offset_t      dst_offset;
7321         vm_object_offset_t      src_offset;
7322         vm_object_offset_t      entry_offset;
7323         vm_map_offset_t         entry_end;
7324         vm_map_size_t           src_size,
7325                                 dst_size,
7326                                 copy_size,
7327                                 amount_left;
7328         kern_return_t           kr = KERN_SUCCESS;
7329
7330
7331         copy_entry = vm_map_copy_first_entry(copy);
7332
7333         vm_map_lock_write_to_read(dst_map);
7334
7335         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
7336         amount_left = copy->size;
7337 /*
7338  *      unaligned so we never clipped this entry, we need the offset into
7339  *      the vm_object not just the data.
7340  */
7341         while (amount_left > 0) {
7342
7343                 if (entry == vm_map_to_entry(dst_map)) {
7344                         vm_map_unlock_read(dst_map);
7345                         return KERN_INVALID_ADDRESS;
7346                 }
7347
7348                 /* "start" must be within the current map entry */
7349                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
7350
7351                 dst_offset = start - entry->vme_start;
7352
7353                 dst_size = entry->vme_end - start;
7354
7355                 src_size = copy_entry->vme_end -
7356                         (copy_entry->vme_start + src_offset);
7357
7358                 if (dst_size < src_size) {
7359 /*
7360  *                      we can only copy dst_size bytes before
7361  *                      we have to get the next destination entry
7362  */
7363                         copy_size = dst_size;
7364                 } else {
7365 /*
7366  *                      we can only copy src_size bytes before
7367  *                      we have to get the next source copy entry
7368  */
7369                         copy_size = src_size;
7370                 }
7371
7372                 if (copy_size > amount_left) {
7373                         copy_size = amount_left;
7374                 }
7375 /*
7376  *              Entry needs copy, create a shadow shadow object for
7377  *              Copy on write region.
7378  */
7379                 if (entry->needs_copy &&
7380                     ((entry->protection & VM_PROT_WRITE) != 0))
7381                 {
7382                         if (vm_map_lock_read_to_write(dst_map)) {
7383                                 vm_map_lock_read(dst_map);
7384                                 goto RetryLookup;
7385                         }
7386                         vm_object_shadow(&entry->object.vm_object,
7387                                          &entry->offset,
7388                                          (vm_map_size_t)(entry->vme_end
7389                                                          - entry->vme_start));
7390                         entry->needs_copy = FALSE;
7391                         vm_map_lock_write_to_read(dst_map);
7392                 }
7393                 dst_object = entry->object.vm_object;
7394 /*
7395  *              unlike with the virtual (aligned) copy we're going
7396  *              to fault on it therefore we need a target object.
7397  */
7398                 if (dst_object == VM_OBJECT_NULL) {
7399                         if (vm_map_lock_read_to_write(dst_map)) {
7400                                 vm_map_lock_read(dst_map);
7401                                 goto RetryLookup;
7402                         }
7403                         dst_object = vm_object_allocate((vm_map_size_t)
7404                                                         entry->vme_end - entry->vme_start);
7405                         entry->object.vm_object = dst_object;
7406                         entry->offset = 0;
7407                         assert(entry->use_pmap);
7408                         vm_map_lock_write_to_read(dst_map);
7409                 }
7410 /*
7411  *              Take an object reference and unlock map. The "entry" may
7412  *              disappear or change when the map is unlocked.
7413  */
7414                 vm_object_reference(dst_object);
7415                 version.main_timestamp = dst_map->timestamp;
7416                 entry_offset = entry->offset;
7417                 entry_end = entry->vme_end;
7418                 vm_map_unlock_read(dst_map);
7419 /*
7420  *              Copy as much as possible in one pass
7421  */
7422                 kr = vm_fault_copy(
7423                         copy_entry->object.vm_object,
7424                         copy_entry->offset + src_offset,
7425                         &copy_size,
7426                         dst_object,
7427                         entry_offset + dst_offset,
7428                         dst_map,
7429                         &version,
7430                         THREAD_UNINT );
7431
7432                 start += copy_size;
7433                 src_offset += copy_size;
7434                 amount_left -= copy_size;
7435 /*
7436  *              Release the object reference
7437  */
7438                 vm_object_deallocate(dst_object);
7439 /*
7440  *              If a hard error occurred, return it now
7441  */
7442                 if (kr != KERN_SUCCESS)
7443                         return kr;
7444
7445                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
7446                     || amount_left == 0)
7447                 {
7448 /*
7449  *                      all done with this copy entry, dispose.
7450  */
7451                         copy_entry_next = copy_entry->vme_next;
7452
7453                         if (discard_on_success) {
7454                                 vm_map_copy_entry_unlink(copy, copy_entry);
7455                                 assert(!copy_entry->is_sub_map);
7456                                 vm_object_deallocate(
7457                                         copy_entry->object.vm_object);
7458                                 vm_map_copy_entry_dispose(copy, copy_entry);
7459                         }
7460
7461                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
7462                             amount_left) {
7463 /*
7464  *                              not finished copying but run out of source
7465  */
7466                                 return KERN_INVALID_ADDRESS;
7467                         }
7468
7469                         copy_entry = copy_entry_next;
7470
7471                         src_offset = 0;
7472                 }
7473
7474                 if (amount_left == 0)
7475                         return KERN_SUCCESS;
7476
7477                 vm_map_lock_read(dst_map);
7478                 if (version.main_timestamp == dst_map->timestamp) {
7479                         if (start == entry_end) {
7480 /*
7481  *                              destination region is split.  Use the version
7482  *                              information to avoid a lookup in the normal
7483  *                              case.
7484  */
7485                                 entry = entry->vme_next;
7486 /*
7487  *                              should be contiguous. Fail if we encounter
7488  *                              a hole in the destination.
7489  */
7490                                 if (start != entry->vme_start) {
7491                                         vm_map_unlock_read(dst_map);
7492                                         return KERN_INVALID_ADDRESS ;
7493                                 }
7494                         }
7495                 } else {
7496 /*
7497  *                      Map version check failed.
7498  *                      we must lookup the entry because somebody
7499  *                      might have changed the map behind our backs.
7500  */
7501                 RetryLookup:
7502                         if (!vm_map_lookup_entry(dst_map, start, &entry))
7503                         {
7504                                 vm_map_unlock_read(dst_map);
7505                                 return KERN_INVALID_ADDRESS ;
7506                         }
7507                 }
7508         }/* while */
7509
7510         return KERN_SUCCESS;
7511 }/* vm_map_copy_overwrite_unaligned */
7512
7513 /*
7514  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
7515  *
7516  *      Description:
7517  *      Does all the vm_trickery possible for whole pages.
7518  *
7519  *      Implementation:
7520  *
7521  *      If there are no permanent objects in the destination,
7522  *      and the source and destination map entry zones match,
7523  *      and the destination map entry is not shared,
7524  *      then the map entries can be deleted and replaced
7525  *      with those from the copy.  The following code is the
7526  *      basic idea of what to do, but there are lots of annoying
7527  *      little details about getting protection and inheritance
7528  *      right.  Should add protection, inheritance, and sharing checks
7529  *      to the above pass and make sure that no wiring is involved.
7530  */
7531
7532 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
7533 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
7534 int vm_map_copy_overwrite_aligned_src_large = 0;
7535
7536 static kern_return_t
7537 vm_map_copy_overwrite_aligned(
7538         vm_map_t        dst_map,
7539         vm_map_entry_t  tmp_entry,
7540         vm_map_copy_t   copy,
7541         vm_map_offset_t start,
7542         __unused pmap_t pmap)
7543 {
7544         vm_object_t     object;
7545         vm_map_entry_t  copy_entry;
7546         vm_map_size_t   copy_size;
7547         vm_map_size_t   size;
7548         vm_map_entry_t  entry;
7549
7550         while ((copy_entry = vm_map_copy_first_entry(copy))
7551                != vm_map_copy_to_entry(copy))
7552         {
7553                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
7554
7555                 entry = tmp_entry;
7556                 if (entry->is_sub_map) {
7557                         /* unnested when clipped earlier */
7558                         assert(!entry->use_pmap);
7559                 }
7560                 if (entry == vm_map_to_entry(dst_map)) {
7561                         vm_map_unlock(dst_map);
7562                         return KERN_INVALID_ADDRESS;
7563                 }
7564                 size = (entry->vme_end - entry->vme_start);
7565                 /*
7566                  *      Make sure that no holes popped up in the
7567                  *      address map, and that the protection is
7568                  *      still valid, in case the map was unlocked
7569                  *      earlier.
7570                  */
7571
7572                 if ((entry->vme_start != start) || ((entry->is_sub_map)
7573                                                     && !entry->needs_copy)) {
7574                         vm_map_unlock(dst_map);
7575                         return(KERN_INVALID_ADDRESS);
7576                 }
7577                 assert(entry != vm_map_to_entry(dst_map));
7578
7579                 /*
7580                  *      Check protection again
7581                  */
7582
7583                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7584                         vm_map_unlock(dst_map);
7585                         return(KERN_PROTECTION_FAILURE);
7586                 }
7587
7588                 /*
7589                  *      Adjust to source size first
7590                  */
7591
7592                 if (copy_size < size) {
7593                         if (entry->map_aligned &&
7594                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
7595                                                  VM_MAP_PAGE_MASK(dst_map))) {
7596                                 /* no longer map-aligned */
7597                                 entry->map_aligned = FALSE;
7598                         }
7599                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
7600                         size = copy_size;
7601                 }
7602
7603                 /*
7604                  *      Adjust to destination size
7605                  */
7606
7607                 if (size < copy_size) {
7608                         vm_map_copy_clip_end(copy, copy_entry,
7609                                              copy_entry->vme_start + size);
7610                         copy_size = size;
7611                 }
7612
7613                 assert((entry->vme_end - entry->vme_start) == size);
7614                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
7615                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
7616
7617                 /*
7618                  *      If the destination contains temporary unshared memory,
7619                  *      we can perform the copy by throwing it away and
7620                  *      installing the source data.
7621                  */
7622
7623                 object = entry->object.vm_object;
7624                 if ((!entry->is_shared &&
7625                      ((object == VM_OBJECT_NULL) ||
7626                       (object->internal && !object->true_share))) ||
7627                     entry->needs_copy) {
7628                         vm_object_t     old_object = entry->object.vm_object;
7629                         vm_object_offset_t      old_offset = entry->offset;
7630                         vm_object_offset_t      offset;
7631
7632                         /*
7633                          * Ensure that the source and destination aren't
7634                          * identical
7635                          */
7636                         if (old_object == copy_entry->object.vm_object &&
7637                             old_offset == copy_entry->offset) {
7638                                 vm_map_copy_entry_unlink(copy, copy_entry);
7639                                 vm_map_copy_entry_dispose(copy, copy_entry);
7640
7641                                 if (old_object != VM_OBJECT_NULL)
7642                                         vm_object_deallocate(old_object);
7643
7644                                 start = tmp_entry->vme_end;
7645                                 tmp_entry = tmp_entry->vme_next;
7646                                 continue;
7647                         }
7648
7649 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
7650 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
7651                         if (copy_entry->object.vm_object != VM_OBJECT_NULL &&
7652                             copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE &&
7653                             copy_size <= __TRADEOFF1_COPY_SIZE) {
7654                                 /*
7655                                  * Virtual vs. Physical copy tradeoff #1.
7656                                  *
7657                                  * Copying only a few pages out of a large
7658                                  * object:  do a physical copy instead of
7659                                  * a virtual copy, to avoid possibly keeping
7660                                  * the entire large object alive because of
7661                                  * those few copy-on-write pages.
7662                                  */
7663                                 vm_map_copy_overwrite_aligned_src_large++;
7664                                 goto slow_copy;
7665                         }
7666
7667                         if (entry->alias >= VM_MEMORY_MALLOC &&
7668                             entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) {
7669                                 vm_object_t new_object, new_shadow;
7670
7671                                 /*
7672                                  * We're about to map something over a mapping
7673                                  * established by malloc()...
7674                                  */
7675                                 new_object = copy_entry->object.vm_object;
7676                                 if (new_object != VM_OBJECT_NULL) {
7677                                         vm_object_lock_shared(new_object);
7678                                 }
7679                                 while (new_object != VM_OBJECT_NULL &&
7680                                        !new_object->true_share &&
7681                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
7682                                        new_object->internal) {
7683                                         new_shadow = new_object->shadow;
7684                                         if (new_shadow == VM_OBJECT_NULL) {
7685                                                 break;
7686                                         }
7687                                         vm_object_lock_shared(new_shadow);
7688                                         vm_object_unlock(new_object);
7689                                         new_object = new_shadow;
7690                                 }
7691                                 if (new_object != VM_OBJECT_NULL) {
7692                                         if (!new_object->internal) {
7693                                                 /*
7694                                                  * The new mapping is backed
7695                                                  * by an external object.  We
7696                                                  * don't want malloc'ed memory
7697                                                  * to be replaced with such a
7698                                                  * non-anonymous mapping, so
7699                                                  * let's go off the optimized
7700                                                  * path...
7701                                                  */
7702                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
7703                                                 vm_object_unlock(new_object);
7704                                                 goto slow_copy;
7705                                         }
7706                                         if (new_object->true_share ||
7707                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
7708                                                 /*
7709                                                  * Same if there's a "true_share"
7710                                                  * object in the shadow chain, or
7711                                                  * an object with a non-default
7712                                                  * (SYMMETRIC) copy strategy.
7713                                                  */
7714                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
7715                                                 vm_object_unlock(new_object);
7716                                                 goto slow_copy;
7717                                         }
7718                                         vm_object_unlock(new_object);
7719                                 }
7720                                 /*
7721                                  * The new mapping is still backed by
7722                                  * anonymous (internal) memory, so it's
7723                                  * OK to substitute it for the original
7724                                  * malloc() mapping.
7725                                  */
7726                         }
7727
7728                         if (old_object != VM_OBJECT_NULL) {
7729                                 if(entry->is_sub_map) {
7730                                         if(entry->use_pmap) {
7731 #ifndef NO_NESTED_PMAP
7732                                                 pmap_unnest(dst_map->pmap,
7733                                                             (addr64_t)entry->vme_start,
7734                                                             entry->vme_end - entry->vme_start);
7735 #endif  /* NO_NESTED_PMAP */
7736                                                 if(dst_map->mapped_in_other_pmaps) {
7737                                                         /* clean up parent */
7738                                                         /* map/maps */
7739                                                         vm_map_submap_pmap_clean(
7740                                                                 dst_map, entry->vme_start,
7741                                                                 entry->vme_end,
7742                                                                 entry->object.sub_map,
7743                                                                 entry->offset);
7744                                                 }
7745                                         } else {
7746                                                 vm_map_submap_pmap_clean(
7747                                                         dst_map, entry->vme_start,
7748                                                         entry->vme_end,
7749                                                         entry->object.sub_map,
7750                                                         entry->offset);
7751                                         }
7752                                         vm_map_deallocate(
7753                                                 entry->object.sub_map);
7754                                 } else {
7755                                         if(dst_map->mapped_in_other_pmaps) {
7756                                                 vm_object_pmap_protect_options(
7757                                                         entry->object.vm_object,
7758                                                         entry->offset,
7759                                                         entry->vme_end
7760                                                         - entry->vme_start,
7761                                                         PMAP_NULL,
7762                                                         entry->vme_start,
7763                                                         VM_PROT_NONE,
7764                                                         PMAP_OPTIONS_REMOVE);
7765                                         } else {
7766                                                 pmap_remove_options(
7767                                                         dst_map->pmap,
7768                                                         (addr64_t)(entry->vme_start),
7769                                                         (addr64_t)(entry->vme_end),
7770                                                         PMAP_OPTIONS_REMOVE);
7771                                         }
7772                                         vm_object_deallocate(old_object);
7773                                 }
7774                         }
7775
7776                         entry->is_sub_map = FALSE;
7777                         entry->object = copy_entry->object;
7778                         object = entry->object.vm_object;
7779                         entry->needs_copy = copy_entry->needs_copy;
7780                         entry->wired_count = 0;
7781                         entry->user_wired_count = 0;
7782                         offset = entry->offset = copy_entry->offset;
7783
7784                         vm_map_copy_entry_unlink(copy, copy_entry);
7785                         vm_map_copy_entry_dispose(copy, copy_entry);
7786
7787                         /*
7788                          * we could try to push pages into the pmap at this point, BUT
7789                          * this optimization only saved on average 2 us per page if ALL
7790                          * the pages in the source were currently mapped
7791                          * and ALL the pages in the dest were touched, if there were fewer
7792                          * than 2/3 of the pages touched, this optimization actually cost more cycles
7793                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
7794                          */
7795
7796                         /*
7797                          *      Set up for the next iteration.  The map
7798                          *      has not been unlocked, so the next
7799                          *      address should be at the end of this
7800                          *      entry, and the next map entry should be
7801                          *      the one following it.
7802                          */
7803
7804                         start = tmp_entry->vme_end;
7805                         tmp_entry = tmp_entry->vme_next;
7806                 } else {
7807                         vm_map_version_t        version;
7808                         vm_object_t             dst_object;
7809                         vm_object_offset_t      dst_offset;
7810                         kern_return_t           r;
7811
7812                 slow_copy:
7813                         if (entry->needs_copy) {
7814                                 vm_object_shadow(&entry->object.vm_object,
7815                                                  &entry->offset,
7816                                                  (entry->vme_end -
7817                                                   entry->vme_start));
7818                                 entry->needs_copy = FALSE;
7819                         }
7820
7821                         dst_object = entry->object.vm_object;
7822                         dst_offset = entry->offset;
7823
7824                         /*
7825                          *      Take an object reference, and record
7826                          *      the map version information so that the
7827                          *      map can be safely unlocked.
7828                          */
7829
7830                         if (dst_object == VM_OBJECT_NULL) {
7831                                 /*
7832                                  * We would usually have just taken the
7833                                  * optimized path above if the destination
7834                                  * object has not been allocated yet.  But we
7835                                  * now disable that optimization if the copy
7836                                  * entry's object is not backed by anonymous
7837                                  * memory to avoid replacing malloc'ed
7838                                  * (i.e. re-usable) anonymous memory with a
7839                                  * not-so-anonymous mapping.
7840                                  * So we have to handle this case here and
7841                                  * allocate a new VM object for this map entry.
7842                                  */
7843                                 dst_object = vm_object_allocate(
7844                                         entry->vme_end - entry->vme_start);
7845                                 dst_offset = 0;
7846                                 entry->object.vm_object = dst_object;
7847                                 entry->offset = dst_offset;
7848                                 assert(entry->use_pmap);
7849
7850                         }
7851
7852                         vm_object_reference(dst_object);
7853
7854                         /* account for unlock bumping up timestamp */
7855                         version.main_timestamp = dst_map->timestamp + 1;
7856
7857                         vm_map_unlock(dst_map);
7858
7859                         /*
7860                          *      Copy as much as possible in one pass
7861                          */
7862
7863                         copy_size = size;
7864                         r = vm_fault_copy(
7865                                 copy_entry->object.vm_object,
7866                                 copy_entry->offset,
7867                                 &copy_size,
7868                                 dst_object,
7869                                 dst_offset,
7870                                 dst_map,
7871                                 &version,
7872                                 THREAD_UNINT );
7873
7874                         /*
7875                          *      Release the object reference
7876                          */
7877
7878                         vm_object_deallocate(dst_object);
7879
7880                         /*
7881                          *      If a hard error occurred, return it now
7882                          */
7883
7884                         if (r != KERN_SUCCESS)
7885                                 return(r);
7886
7887                         if (copy_size != 0) {
7888                                 /*
7889                                  *      Dispose of the copied region
7890                                  */
7891
7892                                 vm_map_copy_clip_end(copy, copy_entry,
7893                                                      copy_entry->vme_start + copy_size);
7894                                 vm_map_copy_entry_unlink(copy, copy_entry);
7895                                 vm_object_deallocate(copy_entry->object.vm_object);
7896                                 vm_map_copy_entry_dispose(copy, copy_entry);
7897                         }
7898
7899                         /*
7900                          *      Pick up in the destination map where we left off.
7901                          *
7902                          *      Use the version information to avoid a lookup
7903                          *      in the normal case.
7904                          */
7905
7906                         start += copy_size;
7907                         vm_map_lock(dst_map);
7908                         if (version.main_timestamp == dst_map->timestamp &&
7909                             copy_size != 0) {
7910                                 /* We can safely use saved tmp_entry value */
7911
7912                                 if (tmp_entry->map_aligned &&
7913                                     !VM_MAP_PAGE_ALIGNED(
7914                                             start,
7915                                             VM_MAP_PAGE_MASK(dst_map))) {
7916                                         /* no longer map-aligned */
7917                                         tmp_entry->map_aligned = FALSE;
7918                                 }
7919                                 vm_map_clip_end(dst_map, tmp_entry, start);
7920                                 tmp_entry = tmp_entry->vme_next;
7921                         } else {
7922                                 /* Must do lookup of tmp_entry */
7923
7924                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
7925                                         vm_map_unlock(dst_map);
7926                                         return(KERN_INVALID_ADDRESS);
7927                                 }
7928                                 if (tmp_entry->map_aligned &&
7929                                     !VM_MAP_PAGE_ALIGNED(
7930                                             start,
7931                                             VM_MAP_PAGE_MASK(dst_map))) {
7932                                         /* no longer map-aligned */
7933                                         tmp_entry->map_aligned = FALSE;
7934                                 }
7935                                 vm_map_clip_start(dst_map, tmp_entry, start);
7936                         }
7937                 }
7938         }/* while */
7939
7940         return(KERN_SUCCESS);
7941 }/* vm_map_copy_overwrite_aligned */
7942
7943 /*
7944  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
7945  *
7946  *      Description:
7947  *              Copy in data to a kernel buffer from space in the
7948  *              source map. The original space may be optionally
7949  *              deallocated.
7950  *
7951  *              If successful, returns a new copy object.
7952  */
7953 static kern_return_t
7954 vm_map_copyin_kernel_buffer(
7955         vm_map_t        src_map,
7956         vm_map_offset_t src_addr,
7957         vm_map_size_t   len,
7958         boolean_t       src_destroy,
7959         vm_map_copy_t   *copy_result)
7960 {
7961         kern_return_t kr;
7962         vm_map_copy_t copy;
7963         vm_size_t kalloc_size;
7964
7965         if ((vm_size_t) len != len) {
7966                 /* "len" is too big and doesn't fit in a "vm_size_t" */
7967                 return KERN_RESOURCE_SHORTAGE;
7968         }
7969         kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len);
7970         assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len);
7971
7972         copy = (vm_map_copy_t) kalloc(kalloc_size);
7973         if (copy == VM_MAP_COPY_NULL) {
7974                 return KERN_RESOURCE_SHORTAGE;
7975         }
7976         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
7977         copy->size = len;
7978         copy->offset = 0;
7979         copy->cpy_kdata = (void *) (copy + 1);
7980         copy->cpy_kalloc_size = kalloc_size;
7981
7982         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len);
7983         if (kr != KERN_SUCCESS) {
7984                 kfree(copy, kalloc_size);
7985                 return kr;
7986         }
7987         if (src_destroy) {
7988                 (void) vm_map_remove(
7989                         src_map,
7990                         vm_map_trunc_page(src_addr,
7991                                           VM_MAP_PAGE_MASK(src_map)),
7992                         vm_map_round_page(src_addr + len,
7993                                           VM_MAP_PAGE_MASK(src_map)),
7994                         (VM_MAP_REMOVE_INTERRUPTIBLE |
7995                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
7996                          (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0));
7997         }
7998         *copy_result = copy;
7999         return KERN_SUCCESS;
8000 }
8001
8002 /*
8003  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
8004  *
8005  *      Description:
8006  *              Copy out data from a kernel buffer into space in the
8007  *              destination map. The space may be otpionally dynamically
8008  *              allocated.
8009  *
8010  *              If successful, consumes the copy object.
8011  *              Otherwise, the caller is responsible for it.
8012  */
8013 static int vm_map_copyout_kernel_buffer_failures = 0;
8014 static kern_return_t
8015 vm_map_copyout_kernel_buffer(
8016         vm_map_t                map,
8017         vm_map_address_t        *addr,  /* IN/OUT */
8018         vm_map_copy_t           copy,
8019         boolean_t               overwrite,
8020         boolean_t               consume_on_success)
8021 {
8022         kern_return_t kr = KERN_SUCCESS;
8023         thread_t thread = current_thread();
8024
8025         if (!overwrite) {
8026
8027                 /*
8028                  * Allocate space in the target map for the data
8029                  */
8030                 *addr = 0;
8031                 kr = vm_map_enter(map,
8032                                   addr,
8033                                   vm_map_round_page(copy->size,
8034                                                     VM_MAP_PAGE_MASK(map)),
8035                                   (vm_map_offset_t) 0,
8036                                   VM_FLAGS_ANYWHERE,
8037                                   VM_OBJECT_NULL,
8038                                   (vm_object_offset_t) 0,
8039                                   FALSE,
8040                                   VM_PROT_DEFAULT,
8041                                   VM_PROT_ALL,
8042                                   VM_INHERIT_DEFAULT);
8043                 if (kr != KERN_SUCCESS)
8044                         return kr;
8045         }
8046
8047         /*
8048          * Copyout the data from the kernel buffer to the target map.
8049          */
8050         if (thread->map == map) {
8051
8052                 /*
8053                  * If the target map is the current map, just do
8054                  * the copy.
8055                  */
8056                 assert((vm_size_t) copy->size == copy->size);
8057                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8058                         kr = KERN_INVALID_ADDRESS;
8059                 }
8060         }
8061         else {
8062                 vm_map_t oldmap;
8063
8064                 /*
8065                  * If the target map is another map, assume the
8066                  * target's address space identity for the duration
8067                  * of the copy.
8068                  */
8069                 vm_map_reference(map);
8070                 oldmap = vm_map_switch(map);
8071
8072                 assert((vm_size_t) copy->size == copy->size);
8073                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) {
8074                         vm_map_copyout_kernel_buffer_failures++;
8075                         kr = KERN_INVALID_ADDRESS;
8076                 }
8077
8078                 (void) vm_map_switch(oldmap);
8079                 vm_map_deallocate(map);
8080         }
8081
8082         if (kr != KERN_SUCCESS) {
8083                 /* the copy failed, clean up */
8084                 if (!overwrite) {
8085                         /*
8086                          * Deallocate the space we allocated in the target map.
8087                          */
8088                         (void) vm_map_remove(
8089                                 map,
8090                                 vm_map_trunc_page(*addr,
8091                                                   VM_MAP_PAGE_MASK(map)),
8092                                 vm_map_round_page((*addr +
8093                                                    vm_map_round_page(copy->size,
8094                                                                      VM_MAP_PAGE_MASK(map))),
8095                                                   VM_MAP_PAGE_MASK(map)),
8096                                 VM_MAP_NO_FLAGS);
8097                         *addr = 0;
8098                 }
8099         } else {
8100                 /* copy was successful, dicard the copy structure */
8101                 if (consume_on_success) {
8102                         kfree(copy, copy->cpy_kalloc_size);
8103                 }
8104         }
8105
8106         return kr;
8107 }
8108
8109 /*
8110  *      Macro:          vm_map_copy_insert
8111  *
8112  *      Description:
8113  *              Link a copy chain ("copy") into a map at the
8114  *              specified location (after "where").
8115  *      Side effects:
8116  *              The copy chain is destroyed.
8117  *      Warning:
8118  *              The arguments are evaluated multiple times.
8119  */
8120 #define vm_map_copy_insert(map, where, copy)                            \
8121 MACRO_BEGIN                                                             \
8122         vm_map_store_copy_insert(map, where, copy);       \
8123         zfree(vm_map_copy_zone, copy);          \
8124 MACRO_END
8125
8126 void
8127 vm_map_copy_remap(
8128         vm_map_t        map,
8129         vm_map_entry_t  where,
8130         vm_map_copy_t   copy,
8131         vm_map_offset_t adjustment,
8132         vm_prot_t       cur_prot,
8133         vm_prot_t       max_prot,
8134         vm_inherit_t    inheritance)
8135 {
8136         vm_map_entry_t  copy_entry, new_entry;
8137
8138         for (copy_entry = vm_map_copy_first_entry(copy);
8139              copy_entry != vm_map_copy_to_entry(copy);
8140              copy_entry = copy_entry->vme_next) {
8141                 /* get a new VM map entry for the map */
8142                 new_entry = vm_map_entry_create(map,
8143                                                 !map->hdr.entries_pageable);
8144                 /* copy the "copy entry" to the new entry */
8145                 vm_map_entry_copy(new_entry, copy_entry);
8146                 /* adjust "start" and "end" */
8147                 new_entry->vme_start += adjustment;
8148                 new_entry->vme_end += adjustment;
8149                 /* clear some attributes */
8150                 new_entry->inheritance = inheritance;
8151                 new_entry->protection = cur_prot;
8152                 new_entry->max_protection = max_prot;
8153                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
8154                 /* take an extra reference on the entry's "object" */
8155                 if (new_entry->is_sub_map) {
8156                         assert(!new_entry->use_pmap); /* not nested */
8157                         vm_map_lock(new_entry->object.sub_map);
8158                         vm_map_reference(new_entry->object.sub_map);
8159                         vm_map_unlock(new_entry->object.sub_map);
8160                 } else {
8161                         vm_object_reference(new_entry->object.vm_object);
8162                 }
8163                 /* insert the new entry in the map */
8164                 vm_map_store_entry_link(map, where, new_entry);
8165                 /* continue inserting the "copy entries" after the new entry */
8166                 where = new_entry;
8167         }
8168 }
8169
8170 /*
8171  *      Routine:        vm_map_copyout
8172  *
8173  *      Description:
8174  *              Copy out a copy chain ("copy") into newly-allocated
8175  *              space in the destination map.
8176  *
8177  *              If successful, consumes the copy object.
8178  *              Otherwise, the caller is responsible for it.
8179  */
8180
8181 kern_return_t
8182 vm_map_copyout(
8183         vm_map_t                dst_map,
8184         vm_map_address_t        *dst_addr,      /* OUT */
8185         vm_map_copy_t           copy)
8186 {
8187         return vm_map_copyout_internal(dst_map, dst_addr, copy,
8188                                        TRUE, /* consume_on_success */
8189                                        VM_PROT_DEFAULT,
8190                                        VM_PROT_ALL,
8191                                        VM_INHERIT_DEFAULT);
8192 }
8193
8194 kern_return_t
8195 vm_map_copyout_internal(
8196         vm_map_t                dst_map,
8197         vm_map_address_t        *dst_addr,      /* OUT */
8198         vm_map_copy_t           copy,
8199         boolean_t               consume_on_success,
8200         vm_prot_t               cur_protection,
8201         vm_prot_t               max_protection,
8202         vm_inherit_t            inheritance)
8203 {
8204         vm_map_size_t           size;
8205         vm_map_size_t           adjustment;
8206         vm_map_offset_t         start;
8207         vm_object_offset_t      vm_copy_start;
8208         vm_map_entry_t          last;
8209         vm_map_entry_t          entry;
8210
8211         /*
8212          *      Check for null copy object.
8213          */
8214
8215         if (copy == VM_MAP_COPY_NULL) {
8216                 *dst_addr = 0;
8217                 return(KERN_SUCCESS);
8218         }
8219
8220         /*
8221          *      Check for special copy object, created
8222          *      by vm_map_copyin_object.
8223          */
8224
8225         if (copy->type == VM_MAP_COPY_OBJECT) {
8226                 vm_object_t             object = copy->cpy_object;
8227                 kern_return_t           kr;
8228                 vm_object_offset_t      offset;
8229
8230                 offset = vm_object_trunc_page(copy->offset);
8231                 size = vm_map_round_page((copy->size +
8232                                           (vm_map_size_t)(copy->offset -
8233                                                           offset)),
8234                                          VM_MAP_PAGE_MASK(dst_map));
8235                 *dst_addr = 0;
8236                 kr = vm_map_enter(dst_map, dst_addr, size,
8237                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
8238                                   object, offset, FALSE,
8239                                   VM_PROT_DEFAULT, VM_PROT_ALL,
8240                                   VM_INHERIT_DEFAULT);
8241                 if (kr != KERN_SUCCESS)
8242                         return(kr);
8243                 /* Account for non-pagealigned copy object */
8244                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
8245                 if (consume_on_success)
8246                         zfree(vm_map_copy_zone, copy);
8247                 return(KERN_SUCCESS);
8248         }
8249
8250         /*
8251          *      Check for special kernel buffer allocated
8252          *      by new_ipc_kmsg_copyin.
8253          */
8254
8255         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8256                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
8257                                                     copy, FALSE,
8258                                                     consume_on_success);
8259         }
8260
8261
8262         /*
8263          *      Find space for the data
8264          */
8265
8266         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
8267                                           VM_MAP_COPY_PAGE_MASK(copy));
8268         size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size,
8269                                  VM_MAP_COPY_PAGE_MASK(copy))
8270                 - vm_copy_start;
8271
8272
8273 StartAgain: ;
8274
8275         vm_map_lock(dst_map);
8276         if( dst_map->disable_vmentry_reuse == TRUE) {
8277                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
8278                 last = entry;
8279         } else {
8280                 assert(first_free_is_valid(dst_map));
8281                 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
8282                 vm_map_min(dst_map) : last->vme_end;
8283                 start = vm_map_round_page(start,
8284                                           VM_MAP_PAGE_MASK(dst_map));
8285         }
8286
8287         while (TRUE) {
8288                 vm_map_entry_t  next = last->vme_next;
8289                 vm_map_offset_t end = start + size;
8290
8291                 if ((end > dst_map->max_offset) || (end < start)) {
8292                         if (dst_map->wait_for_space) {
8293                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
8294                                         assert_wait((event_t) dst_map,
8295                                                     THREAD_INTERRUPTIBLE);
8296                                         vm_map_unlock(dst_map);
8297                                         thread_block(THREAD_CONTINUE_NULL);
8298                                         goto StartAgain;
8299                                 }
8300                         }
8301                         vm_map_unlock(dst_map);
8302                         return(KERN_NO_SPACE);
8303                 }
8304
8305                 if ((next == vm_map_to_entry(dst_map)) ||
8306                     (next->vme_start >= end))
8307                         break;
8308
8309                 last = next;
8310                 start = last->vme_end;
8311                 start = vm_map_round_page(start,
8312                                           VM_MAP_PAGE_MASK(dst_map));
8313         }
8314
8315         adjustment = start - vm_copy_start;
8316         if (! consume_on_success) {
8317                 /*
8318                  * We're not allowed to consume "copy", so we'll have to
8319                  * copy its map entries into the destination map below.
8320                  * No need to re-allocate map entries from the correct
8321                  * (pageable or not) zone, since we'll get new map entries
8322                  * during the transfer.
8323                  * We'll also adjust the map entries's "start" and "end"
8324                  * during the transfer, to keep "copy"'s entries consistent
8325                  * with its "offset".
8326                  */
8327                 goto after_adjustments;
8328         }
8329
8330         /*
8331          *      Since we're going to just drop the map
8332          *      entries from the copy into the destination
8333          *      map, they must come from the same pool.
8334          */
8335
8336         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
8337                 /*
8338                  * Mismatches occur when dealing with the default
8339                  * pager.
8340                  */
8341                 zone_t          old_zone;
8342                 vm_map_entry_t  next, new;
8343
8344                 /*
8345                  * Find the zone that the copies were allocated from
8346                  */
8347
8348                 entry = vm_map_copy_first_entry(copy);
8349
8350                 /*
8351                  * Reinitialize the copy so that vm_map_copy_entry_link
8352                  * will work.
8353                  */
8354                 vm_map_store_copy_reset(copy, entry);
8355                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
8356
8357                 /*
8358                  * Copy each entry.
8359                  */
8360                 while (entry != vm_map_copy_to_entry(copy)) {
8361                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8362                         vm_map_entry_copy_full(new, entry);
8363                         assert(!new->iokit_acct);
8364                         if (new->is_sub_map) {
8365                                 /* clr address space specifics */
8366                                 new->use_pmap = FALSE;
8367                         }
8368                         vm_map_copy_entry_link(copy,
8369                                                vm_map_copy_last_entry(copy),
8370                                                new);
8371                         next = entry->vme_next;
8372                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
8373                         zfree(old_zone, entry);
8374                         entry = next;
8375                 }
8376         }
8377
8378         /*
8379          *      Adjust the addresses in the copy chain, and
8380          *      reset the region attributes.
8381          */
8382
8383         for (entry = vm_map_copy_first_entry(copy);
8384              entry != vm_map_copy_to_entry(copy);
8385              entry = entry->vme_next) {
8386                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
8387                         /*
8388                          * We're injecting this copy entry into a map that
8389                          * has the standard page alignment, so clear
8390                          * "map_aligned" (which might have been inherited
8391                          * from the original map entry).
8392                          */
8393                         entry->map_aligned = FALSE;
8394                 }
8395
8396                 entry->vme_start += adjustment;
8397                 entry->vme_end += adjustment;
8398
8399                 if (entry->map_aligned) {
8400                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
8401                                                    VM_MAP_PAGE_MASK(dst_map)));
8402                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
8403                                                    VM_MAP_PAGE_MASK(dst_map)));
8404                 }
8405
8406                 entry->inheritance = VM_INHERIT_DEFAULT;
8407                 entry->protection = VM_PROT_DEFAULT;
8408                 entry->max_protection = VM_PROT_ALL;
8409                 entry->behavior = VM_BEHAVIOR_DEFAULT;
8410
8411                 /*
8412                  * If the entry is now wired,
8413                  * map the pages into the destination map.
8414                  */
8415                 if (entry->wired_count != 0) {
8416                         register vm_map_offset_t va;
8417                         vm_object_offset_t       offset;
8418                         register vm_object_t object;
8419                         vm_prot_t prot;
8420                         int     type_of_fault;
8421
8422                         object = entry->object.vm_object;
8423                         offset = entry->offset;
8424                         va = entry->vme_start;
8425
8426                         pmap_pageable(dst_map->pmap,
8427                                       entry->vme_start,
8428                                       entry->vme_end,
8429                                       TRUE);
8430
8431                         while (va < entry->vme_end) {
8432                                 register vm_page_t      m;
8433
8434                                 /*
8435                                  * Look up the page in the object.
8436                                  * Assert that the page will be found in the
8437                                  * top object:
8438                                  * either
8439                                  *      the object was newly created by
8440                                  *      vm_object_copy_slowly, and has
8441                                  *      copies of all of the pages from
8442                                  *      the source object
8443                                  * or
8444                                  *      the object was moved from the old
8445                                  *      map entry; because the old map
8446                                  *      entry was wired, all of the pages
8447                                  *      were in the top-level object.
8448                                  *      (XXX not true if we wire pages for
8449                                  *       reading)
8450                                  */
8451                                 vm_object_lock(object);
8452
8453                                 m = vm_page_lookup(object, offset);
8454                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
8455                                     m->absent)
8456                                         panic("vm_map_copyout: wiring %p", m);
8457
8458                                 /*
8459                                  * ENCRYPTED SWAP:
8460                                  * The page is assumed to be wired here, so it
8461                                  * shouldn't be encrypted.  Otherwise, we
8462                                  * couldn't enter it in the page table, since
8463                                  * we don't want the user to see the encrypted
8464                                  * data.
8465                                  */
8466                                 ASSERT_PAGE_DECRYPTED(m);
8467
8468                                 prot = entry->protection;
8469
8470                                 if (override_nx(dst_map, entry->alias) && prot)
8471                                         prot |= VM_PROT_EXECUTE;
8472
8473                                 type_of_fault = DBG_CACHE_HIT_FAULT;
8474
8475                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
8476                                                VM_PAGE_WIRED(m), FALSE, FALSE,
8477                                                FALSE, entry->alias,
8478                                                ((entry->iokit_acct ||
8479                                                  (!entry->is_sub_map &&
8480                                                   !entry->use_pmap))
8481                                                 ? PMAP_OPTIONS_ALT_ACCT
8482                                                 : 0),
8483                                                NULL, &type_of_fault);
8484
8485                                 vm_object_unlock(object);
8486
8487                                 offset += PAGE_SIZE_64;
8488                                 va += PAGE_SIZE;
8489                         }
8490                 }
8491         }
8492
8493 after_adjustments:
8494
8495         /*
8496          *      Correct the page alignment for the result
8497          */
8498
8499         *dst_addr = start + (copy->offset - vm_copy_start);
8500
8501         /*
8502          *      Update the hints and the map size
8503          */
8504
8505         if (consume_on_success) {
8506                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
8507         } else {
8508                 SAVE_HINT_MAP_WRITE(dst_map, last);
8509         }
8510
8511         dst_map->size += size;
8512
8513         /*
8514          *      Link in the copy
8515          */
8516
8517         if (consume_on_success) {
8518                 vm_map_copy_insert(dst_map, last, copy);
8519         } else {
8520                 vm_map_copy_remap(dst_map, last, copy, adjustment,
8521                                   cur_protection, max_protection,
8522                                   inheritance);
8523         }
8524
8525         vm_map_unlock(dst_map);
8526
8527         /*
8528          * XXX  If wiring_required, call vm_map_pageable
8529          */
8530
8531         return(KERN_SUCCESS);
8532 }
8533
8534 /*
8535  *      Routine:        vm_map_copyin
8536  *
8537  *      Description:
8538  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
8539  *
8540  */
8541
8542 #undef vm_map_copyin
8543
8544 kern_return_t
8545 vm_map_copyin(
8546         vm_map_t                        src_map,
8547         vm_map_address_t        src_addr,
8548         vm_map_size_t           len,
8549         boolean_t                       src_destroy,
8550         vm_map_copy_t           *copy_result)   /* OUT */
8551 {
8552         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
8553                                         FALSE, copy_result, FALSE));
8554 }
8555
8556 /*
8557  *      Routine:        vm_map_copyin_common
8558  *
8559  *      Description:
8560  *              Copy the specified region (src_addr, len) from the
8561  *              source address space (src_map), possibly removing
8562  *              the region from the source address space (src_destroy).
8563  *
8564  *      Returns:
8565  *              A vm_map_copy_t object (copy_result), suitable for
8566  *              insertion into another address space (using vm_map_copyout),
8567  *              copying over another address space region (using
8568  *              vm_map_copy_overwrite).  If the copy is unused, it
8569  *              should be destroyed (using vm_map_copy_discard).
8570  *
8571  *      In/out conditions:
8572  *              The source map should not be locked on entry.
8573  */
8574
8575 typedef struct submap_map {
8576         vm_map_t        parent_map;
8577         vm_map_offset_t base_start;
8578         vm_map_offset_t base_end;
8579         vm_map_size_t   base_len;
8580         struct submap_map *next;
8581 } submap_map_t;
8582
8583 kern_return_t
8584 vm_map_copyin_common(
8585         vm_map_t        src_map,
8586         vm_map_address_t src_addr,
8587         vm_map_size_t   len,
8588         boolean_t       src_destroy,
8589         __unused boolean_t      src_volatile,
8590         vm_map_copy_t   *copy_result,   /* OUT */
8591         boolean_t       use_maxprot)
8592 {
8593         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
8594                                          * in multi-level lookup, this
8595                                          * entry contains the actual
8596                                          * vm_object/offset.
8597                                          */
8598         register
8599         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
8600
8601         vm_map_offset_t src_start;      /* Start of current entry --
8602                                          * where copy is taking place now
8603                                          */
8604         vm_map_offset_t src_end;        /* End of entire region to be
8605                                          * copied */
8606         vm_map_offset_t src_base;
8607         vm_map_t        base_map = src_map;
8608         boolean_t       map_share=FALSE;
8609         submap_map_t    *parent_maps = NULL;
8610
8611         register
8612         vm_map_copy_t   copy;           /* Resulting copy */
8613         vm_map_address_t copy_addr;
8614         vm_map_size_t   copy_size;
8615
8616         /*
8617          *      Check for copies of zero bytes.
8618          */
8619
8620         if (len == 0) {
8621                 *copy_result = VM_MAP_COPY_NULL;
8622                 return(KERN_SUCCESS);
8623         }
8624
8625         /*
8626          *      Check that the end address doesn't overflow
8627          */
8628         src_end = src_addr + len;
8629         if (src_end < src_addr)
8630                 return KERN_INVALID_ADDRESS;
8631
8632         /*
8633          * If the copy is sufficiently small, use a kernel buffer instead
8634          * of making a virtual copy.  The theory being that the cost of
8635          * setting up VM (and taking C-O-W faults) dominates the copy costs
8636          * for small regions.
8637          */
8638         if ((len < msg_ool_size_small) && !use_maxprot)
8639                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
8640                                                    src_destroy, copy_result);
8641
8642         /*
8643          *      Compute (page aligned) start and end of region
8644          */
8645         src_start = vm_map_trunc_page(src_addr,
8646                                       VM_MAP_PAGE_MASK(src_map));
8647         src_end = vm_map_round_page(src_end,
8648                                     VM_MAP_PAGE_MASK(src_map));
8649
8650         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
8651
8652         /*
8653          *      Allocate a header element for the list.
8654          *
8655          *      Use the start and end in the header to
8656          *      remember the endpoints prior to rounding.
8657          */
8658
8659         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8660         vm_map_copy_first_entry(copy) =
8661                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
8662         copy->type = VM_MAP_COPY_ENTRY_LIST;
8663         copy->cpy_hdr.nentries = 0;
8664         copy->cpy_hdr.entries_pageable = TRUE;
8665 #if 00
8666         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
8667 #else
8668         /*
8669          * The copy entries can be broken down for a variety of reasons,
8670          * so we can't guarantee that they will remain map-aligned...
8671          * Will need to adjust the first copy_entry's "vme_start" and
8672          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
8673          * rather than the original map's alignment.
8674          */
8675         copy->cpy_hdr.page_shift = PAGE_SHIFT;
8676 #endif
8677
8678         vm_map_store_init( &(copy->cpy_hdr) );
8679
8680         copy->offset = src_addr;
8681         copy->size = len;
8682
8683         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8684
8685 #define RETURN(x)                                               \
8686         MACRO_BEGIN                                             \
8687         vm_map_unlock(src_map);                                 \
8688         if(src_map != base_map)                                 \
8689                 vm_map_deallocate(src_map);                     \
8690         if (new_entry != VM_MAP_ENTRY_NULL)                     \
8691                 vm_map_copy_entry_dispose(copy,new_entry);      \
8692         vm_map_copy_discard(copy);                              \
8693         {                                                       \
8694                 submap_map_t    *_ptr;                          \
8695                                                                 \
8696                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
8697                         parent_maps=parent_maps->next;          \
8698                         if (_ptr->parent_map != base_map)       \
8699                                 vm_map_deallocate(_ptr->parent_map);    \
8700                         kfree(_ptr, sizeof(submap_map_t));      \
8701                 }                                               \
8702         }                                                       \
8703         MACRO_RETURN(x);                                        \
8704         MACRO_END
8705
8706         /*
8707          *      Find the beginning of the region.
8708          */
8709
8710         vm_map_lock(src_map);
8711
8712         /*
8713          * Lookup the original "src_addr" rather than the truncated
8714          * "src_start", in case "src_start" falls in a non-map-aligned
8715          * map entry *before* the map entry that contains "src_addr"...
8716          */
8717         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
8718                 RETURN(KERN_INVALID_ADDRESS);
8719         if(!tmp_entry->is_sub_map) {
8720                 /*
8721                  * ... but clip to the map-rounded "src_start" rather than
8722                  * "src_addr" to preserve map-alignment.  We'll adjust the
8723                  * first copy entry at the end, if needed.
8724                  */
8725                 vm_map_clip_start(src_map, tmp_entry, src_start);
8726         }
8727         if (src_start < tmp_entry->vme_start) {
8728                 /*
8729                  * Move "src_start" up to the start of the
8730                  * first map entry to copy.
8731                  */
8732                 src_start = tmp_entry->vme_start;
8733         }
8734         /* set for later submap fix-up */
8735         copy_addr = src_start;
8736
8737         /*
8738          *      Go through entries until we get to the end.
8739          */
8740
8741         while (TRUE) {
8742                 register
8743                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
8744                 vm_map_size_t   src_size;               /* Size of source
8745                                                          * map entry (in both
8746                                                          * maps)
8747                                                          */
8748
8749                 register
8750                 vm_object_t             src_object;     /* Object to copy */
8751                 vm_object_offset_t      src_offset;
8752
8753                 boolean_t       src_needs_copy;         /* Should source map
8754                                                          * be made read-only
8755                                                          * for copy-on-write?
8756                                                          */
8757
8758                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
8759
8760                 boolean_t       was_wired;              /* Was source wired? */
8761                 vm_map_version_t version;               /* Version before locks
8762                                                          * dropped to make copy
8763                                                          */
8764                 kern_return_t   result;                 /* Return value from
8765                                                          * copy_strategically.
8766                                                          */
8767                 while(tmp_entry->is_sub_map) {
8768                         vm_map_size_t submap_len;
8769                         submap_map_t *ptr;
8770
8771                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
8772                         ptr->next = parent_maps;
8773                         parent_maps = ptr;
8774                         ptr->parent_map = src_map;
8775                         ptr->base_start = src_start;
8776                         ptr->base_end = src_end;
8777                         submap_len = tmp_entry->vme_end - src_start;
8778                         if(submap_len > (src_end-src_start))
8779                                 submap_len = src_end-src_start;
8780                         ptr->base_len = submap_len;
8781
8782                         src_start -= tmp_entry->vme_start;
8783                         src_start += tmp_entry->offset;
8784                         src_end = src_start + submap_len;
8785                         src_map = tmp_entry->object.sub_map;
8786                         vm_map_lock(src_map);
8787                         /* keep an outstanding reference for all maps in */
8788                         /* the parents tree except the base map */
8789                         vm_map_reference(src_map);
8790                         vm_map_unlock(ptr->parent_map);
8791                         if (!vm_map_lookup_entry(
8792                                     src_map, src_start, &tmp_entry))
8793                                 RETURN(KERN_INVALID_ADDRESS);
8794                         map_share = TRUE;
8795                         if(!tmp_entry->is_sub_map)
8796                                 vm_map_clip_start(src_map, tmp_entry, src_start);
8797                         src_entry = tmp_entry;
8798                 }
8799                 /* we are now in the lowest level submap... */
8800
8801                 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) &&
8802                     (tmp_entry->object.vm_object->phys_contiguous)) {
8803                         /* This is not, supported for now.In future */
8804                         /* we will need to detect the phys_contig   */
8805                         /* condition and then upgrade copy_slowly   */
8806                         /* to do physical copy from the device mem  */
8807                         /* based object. We can piggy-back off of   */
8808                         /* the was wired boolean to set-up the      */
8809                         /* proper handling */
8810                         RETURN(KERN_PROTECTION_FAILURE);
8811                 }
8812                 /*
8813                  *      Create a new address map entry to hold the result.
8814                  *      Fill in the fields from the appropriate source entries.
8815                  *      We must unlock the source map to do this if we need
8816                  *      to allocate a map entry.
8817                  */
8818                 if (new_entry == VM_MAP_ENTRY_NULL) {
8819                         version.main_timestamp = src_map->timestamp;
8820                         vm_map_unlock(src_map);
8821
8822                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
8823
8824                         vm_map_lock(src_map);
8825                         if ((version.main_timestamp + 1) != src_map->timestamp) {
8826                                 if (!vm_map_lookup_entry(src_map, src_start,
8827                                                          &tmp_entry)) {
8828                                         RETURN(KERN_INVALID_ADDRESS);
8829                                 }
8830                                 if (!tmp_entry->is_sub_map)
8831                                         vm_map_clip_start(src_map, tmp_entry, src_start);
8832                                 continue; /* restart w/ new tmp_entry */
8833                         }
8834                 }
8835
8836                 /*
8837                  *      Verify that the region can be read.
8838                  */
8839                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
8840                      !use_maxprot) ||
8841                     (src_entry->max_protection & VM_PROT_READ) == 0)
8842                         RETURN(KERN_PROTECTION_FAILURE);
8843
8844                 /*
8845                  *      Clip against the endpoints of the entire region.
8846                  */
8847
8848                 vm_map_clip_end(src_map, src_entry, src_end);
8849
8850                 src_size = src_entry->vme_end - src_start;
8851                 src_object = src_entry->object.vm_object;
8852                 src_offset = src_entry->offset;
8853                 was_wired = (src_entry->wired_count != 0);
8854
8855                 vm_map_entry_copy(new_entry, src_entry);
8856                 if (new_entry->is_sub_map) {
8857                         /* clr address space specifics */
8858                         new_entry->use_pmap = FALSE;
8859                 }
8860
8861                 /*
8862                  *      Attempt non-blocking copy-on-write optimizations.
8863                  */
8864
8865                 if (src_destroy &&
8866                     (src_object == VM_OBJECT_NULL ||
8867                      (src_object->internal && !src_object->true_share
8868                       && !map_share))) {
8869                         /*
8870                          * If we are destroying the source, and the object
8871                          * is internal, we can move the object reference
8872                          * from the source to the copy.  The copy is
8873                          * copy-on-write only if the source is.
8874                          * We make another reference to the object, because
8875                          * destroying the source entry will deallocate it.
8876                          */
8877                         vm_object_reference(src_object);
8878
8879                         /*
8880                          * Copy is always unwired.  vm_map_copy_entry
8881                          * set its wired count to zero.
8882                          */
8883
8884                         goto CopySuccessful;
8885                 }
8886
8887
8888         RestartCopy:
8889                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
8890                     src_object, new_entry, new_entry->object.vm_object,
8891                     was_wired, 0);
8892                 if ((src_object == VM_OBJECT_NULL ||
8893                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
8894                     vm_object_copy_quickly(
8895                             &new_entry->object.vm_object,
8896                             src_offset,
8897                             src_size,
8898                             &src_needs_copy,
8899                             &new_entry_needs_copy)) {
8900
8901                         new_entry->needs_copy = new_entry_needs_copy;
8902
8903                         /*
8904                          *      Handle copy-on-write obligations
8905                          */
8906
8907                         if (src_needs_copy && !tmp_entry->needs_copy) {
8908                                 vm_prot_t prot;
8909
8910                                 prot = src_entry->protection & ~VM_PROT_WRITE;
8911
8912                                 if (override_nx(src_map, src_entry->alias) && prot)
8913                                         prot |= VM_PROT_EXECUTE;
8914
8915                                 vm_object_pmap_protect(
8916                                         src_object,
8917                                         src_offset,
8918                                         src_size,
8919                                         (src_entry->is_shared ?
8920                                          PMAP_NULL
8921                                          : src_map->pmap),
8922                                         src_entry->vme_start,
8923                                         prot);
8924
8925                                 tmp_entry->needs_copy = TRUE;
8926                         }
8927
8928                         /*
8929                          *      The map has never been unlocked, so it's safe
8930                          *      to move to the next entry rather than doing
8931                          *      another lookup.
8932                          */
8933
8934                         goto CopySuccessful;
8935                 }
8936
8937                 /*
8938                  *      Take an object reference, so that we may
8939                  *      release the map lock(s).
8940                  */
8941
8942                 assert(src_object != VM_OBJECT_NULL);
8943                 vm_object_reference(src_object);
8944
8945                 /*
8946                  *      Record the timestamp for later verification.
8947                  *      Unlock the map.
8948                  */
8949
8950                 version.main_timestamp = src_map->timestamp;
8951                 vm_map_unlock(src_map); /* Increments timestamp once! */
8952
8953                 /*
8954                  *      Perform the copy
8955                  */
8956
8957                 if (was_wired) {
8958                 CopySlowly:
8959                         vm_object_lock(src_object);
8960                         result = vm_object_copy_slowly(
8961                                 src_object,
8962                                 src_offset,
8963                                 src_size,
8964                                 THREAD_UNINT,
8965                                 &new_entry->object.vm_object);
8966                         new_entry->offset = 0;
8967                         new_entry->needs_copy = FALSE;
8968
8969                 }
8970                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
8971                          (tmp_entry->is_shared  || map_share)) {
8972                         vm_object_t new_object;
8973
8974                         vm_object_lock_shared(src_object);
8975                         new_object = vm_object_copy_delayed(
8976                                 src_object,
8977                                 src_offset,
8978                                 src_size,
8979                                 TRUE);
8980                         if (new_object == VM_OBJECT_NULL)
8981                                 goto CopySlowly;
8982
8983                         new_entry->object.vm_object = new_object;
8984                         new_entry->needs_copy = TRUE;
8985                         assert(!new_entry->iokit_acct);
8986                         assert(new_object->purgable == VM_PURGABLE_DENY);
8987                         new_entry->use_pmap = TRUE;
8988                         result = KERN_SUCCESS;
8989
8990                 } else {
8991                         result = vm_object_copy_strategically(src_object,
8992                                                               src_offset,
8993                                                               src_size,
8994                                                               &new_entry->object.vm_object,
8995                                                               &new_entry->offset,
8996                                                               &new_entry_needs_copy);
8997
8998                         new_entry->needs_copy = new_entry_needs_copy;
8999                 }
9000
9001                 if (result != KERN_SUCCESS &&
9002                     result != KERN_MEMORY_RESTART_COPY) {
9003                         vm_map_lock(src_map);
9004                         RETURN(result);
9005                 }
9006
9007                 /*
9008                  *      Throw away the extra reference
9009                  */
9010
9011                 vm_object_deallocate(src_object);
9012
9013                 /*
9014                  *      Verify that the map has not substantially
9015                  *      changed while the copy was being made.
9016                  */
9017
9018                 vm_map_lock(src_map);
9019
9020                 if ((version.main_timestamp + 1) == src_map->timestamp)
9021                         goto VerificationSuccessful;
9022
9023                 /*
9024                  *      Simple version comparison failed.
9025                  *
9026                  *      Retry the lookup and verify that the
9027                  *      same object/offset are still present.
9028                  *
9029                  *      [Note: a memory manager that colludes with
9030                  *      the calling task can detect that we have
9031                  *      cheated.  While the map was unlocked, the
9032                  *      mapping could have been changed and restored.]
9033                  */
9034
9035                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
9036                         if (result != KERN_MEMORY_RESTART_COPY) {
9037                                 vm_object_deallocate(new_entry->object.vm_object);
9038                                 new_entry->object.vm_object = VM_OBJECT_NULL;
9039                                 assert(!new_entry->iokit_acct);
9040                                 new_entry->use_pmap = TRUE;
9041                         }
9042                         RETURN(KERN_INVALID_ADDRESS);
9043                 }
9044
9045                 src_entry = tmp_entry;
9046                 vm_map_clip_start(src_map, src_entry, src_start);
9047
9048                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
9049                      !use_maxprot) ||
9050                     ((src_entry->max_protection & VM_PROT_READ) == 0))
9051                         goto VerificationFailed;
9052
9053                 if (src_entry->vme_end < new_entry->vme_end) {
9054                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
9055                                                    VM_MAP_COPY_PAGE_MASK(copy)));
9056                         new_entry->vme_end = src_entry->vme_end;
9057                         src_size = new_entry->vme_end - src_start;
9058                 }
9059
9060                 if ((src_entry->object.vm_object != src_object) ||
9061                     (src_entry->offset != src_offset) ) {
9062
9063                         /*
9064                          *      Verification failed.
9065                          *
9066                          *      Start over with this top-level entry.
9067                          */
9068
9069                 VerificationFailed: ;
9070
9071                         vm_object_deallocate(new_entry->object.vm_object);
9072                         tmp_entry = src_entry;
9073                         continue;
9074                 }
9075
9076                 /*
9077                  *      Verification succeeded.
9078                  */
9079
9080         VerificationSuccessful: ;
9081
9082                 if (result == KERN_MEMORY_RESTART_COPY)
9083                         goto RestartCopy;
9084
9085                 /*
9086                  *      Copy succeeded.
9087                  */
9088
9089         CopySuccessful: ;
9090
9091                 /*
9092                  *      Link in the new copy entry.
9093                  */
9094
9095                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
9096                                        new_entry);
9097
9098                 /*
9099                  *      Determine whether the entire region
9100                  *      has been copied.
9101                  */
9102                 src_base = src_start;
9103                 src_start = new_entry->vme_end;
9104                 new_entry = VM_MAP_ENTRY_NULL;
9105                 while ((src_start >= src_end) && (src_end != 0)) {
9106                         submap_map_t    *ptr;
9107
9108                         if (src_map == base_map) {
9109                                 /* back to the top */
9110                                 break;
9111                         }
9112
9113                         ptr = parent_maps;
9114                         assert(ptr != NULL);
9115                         parent_maps = parent_maps->next;
9116
9117                         /* fix up the damage we did in that submap */
9118                         vm_map_simplify_range(src_map,
9119                                               src_base,
9120                                               src_end);
9121
9122                         vm_map_unlock(src_map);
9123                         vm_map_deallocate(src_map);
9124                         vm_map_lock(ptr->parent_map);
9125                         src_map = ptr->parent_map;
9126                         src_base = ptr->base_start;
9127                         src_start = ptr->base_start + ptr->base_len;
9128                         src_end = ptr->base_end;
9129                         if (!vm_map_lookup_entry(src_map,
9130                                                  src_start,
9131                                                  &tmp_entry) &&
9132                             (src_end > src_start)) {
9133                                 RETURN(KERN_INVALID_ADDRESS);
9134                         }
9135                         kfree(ptr, sizeof(submap_map_t));
9136                         if (parent_maps == NULL)
9137                                 map_share = FALSE;
9138                         src_entry = tmp_entry->vme_prev;
9139                 }
9140
9141                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
9142                     (src_start >= src_addr + len) &&
9143                     (src_addr + len != 0)) {
9144                         /*
9145                          * Stop copying now, even though we haven't reached
9146                          * "src_end".  We'll adjust the end of the last copy
9147                          * entry at the end, if needed.
9148                          *
9149                          * If src_map's aligment is different from the
9150                          * system's page-alignment, there could be
9151                          * extra non-map-aligned map entries between
9152                          * the original (non-rounded) "src_addr + len"
9153                          * and the rounded "src_end".
9154                          * We do not want to copy those map entries since
9155                          * they're not part of the copied range.
9156                          */
9157                         break;
9158                 }
9159
9160                 if ((src_start >= src_end) && (src_end != 0))
9161                         break;
9162
9163                 /*
9164                  *      Verify that there are no gaps in the region
9165                  */
9166
9167                 tmp_entry = src_entry->vme_next;
9168                 if ((tmp_entry->vme_start != src_start) ||
9169                     (tmp_entry == vm_map_to_entry(src_map))) {
9170                         RETURN(KERN_INVALID_ADDRESS);
9171                 }
9172         }
9173
9174         /*
9175          * If the source should be destroyed, do it now, since the
9176          * copy was successful.
9177          */
9178         if (src_destroy) {
9179                 (void) vm_map_delete(
9180                         src_map,
9181                         vm_map_trunc_page(src_addr,
9182                                           VM_MAP_PAGE_MASK(src_map)),
9183                         src_end,
9184                         ((src_map == kernel_map) ?
9185                          VM_MAP_REMOVE_KUNWIRE :
9186                          VM_MAP_NO_FLAGS),
9187                         VM_MAP_NULL);
9188         } else {
9189                 /* fix up the damage we did in the base map */
9190                 vm_map_simplify_range(
9191                         src_map,
9192                         vm_map_trunc_page(src_addr,
9193                                           VM_MAP_PAGE_MASK(src_map)),
9194                         vm_map_round_page(src_end,
9195                                           VM_MAP_PAGE_MASK(src_map)));
9196         }
9197
9198         vm_map_unlock(src_map);
9199
9200         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
9201                 vm_map_offset_t original_start, original_offset, original_end;
9202
9203                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
9204
9205                 /* adjust alignment of first copy_entry's "vme_start" */
9206                 tmp_entry = vm_map_copy_first_entry(copy);
9207                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9208                         vm_map_offset_t adjustment;
9209
9210                         original_start = tmp_entry->vme_start;
9211                         original_offset = tmp_entry->offset;
9212
9213                         /* map-align the start of the first copy entry... */
9214                         adjustment = (tmp_entry->vme_start -
9215                                       vm_map_trunc_page(
9216                                               tmp_entry->vme_start,
9217                                               VM_MAP_PAGE_MASK(src_map)));
9218                         tmp_entry->vme_start -= adjustment;
9219                         tmp_entry->offset -= adjustment;
9220                         copy_addr -= adjustment;
9221                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
9222                         /* ... adjust for mis-aligned start of copy range */
9223                         adjustment =
9224                                 (vm_map_trunc_page(copy->offset,
9225                                                    PAGE_MASK) -
9226                                  vm_map_trunc_page(copy->offset,
9227                                                    VM_MAP_PAGE_MASK(src_map)));
9228                         if (adjustment) {
9229                                 assert(page_aligned(adjustment));
9230                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9231                                 tmp_entry->vme_start += adjustment;
9232                                 tmp_entry->offset += adjustment;
9233                                 copy_addr += adjustment;
9234                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9235                         }
9236
9237                         /*
9238                          * Assert that the adjustments haven't exposed
9239                          * more than was originally copied...
9240                          */
9241                         assert(tmp_entry->vme_start >= original_start);
9242                         assert(tmp_entry->offset >= original_offset);
9243                         /*
9244                          * ... and that it did not adjust outside of a
9245                          * a single 16K page.
9246                          */
9247                         assert(vm_map_trunc_page(tmp_entry->vme_start,
9248                                                  VM_MAP_PAGE_MASK(src_map)) ==
9249                                vm_map_trunc_page(original_start,
9250                                                  VM_MAP_PAGE_MASK(src_map)));
9251                 }
9252
9253                 /* adjust alignment of last copy_entry's "vme_end" */
9254                 tmp_entry = vm_map_copy_last_entry(copy);
9255                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
9256                         vm_map_offset_t adjustment;
9257
9258                         original_end = tmp_entry->vme_end;
9259
9260                         /* map-align the end of the last copy entry... */
9261                         tmp_entry->vme_end =
9262                                 vm_map_round_page(tmp_entry->vme_end,
9263                                                   VM_MAP_PAGE_MASK(src_map));
9264                         /* ... adjust for mis-aligned end of copy range */
9265                         adjustment =
9266                                 (vm_map_round_page((copy->offset +
9267                                                     copy->size),
9268                                                    VM_MAP_PAGE_MASK(src_map)) -
9269                                  vm_map_round_page((copy->offset +
9270                                                     copy->size),
9271                                                    PAGE_MASK));
9272                         if (adjustment) {
9273                                 assert(page_aligned(adjustment));
9274                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
9275                                 tmp_entry->vme_end -= adjustment;
9276                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9277                         }
9278
9279                         /*
9280                          * Assert that the adjustments haven't exposed
9281                          * more than was originally copied...
9282                          */
9283                         assert(tmp_entry->vme_end <= original_end);
9284                         /*
9285                          * ... and that it did not adjust outside of a
9286                          * a single 16K page.
9287                          */
9288                         assert(vm_map_round_page(tmp_entry->vme_end,
9289                                                  VM_MAP_PAGE_MASK(src_map)) ==
9290                                vm_map_round_page(original_end,
9291                                                  VM_MAP_PAGE_MASK(src_map)));
9292                 }
9293         }
9294
9295         /* Fix-up start and end points in copy.  This is necessary */
9296         /* when the various entries in the copy object were picked */
9297         /* up from different sub-maps */
9298
9299         tmp_entry = vm_map_copy_first_entry(copy);
9300         copy_size = 0; /* compute actual size */
9301         while (tmp_entry != vm_map_copy_to_entry(copy)) {
9302                 assert(VM_MAP_PAGE_ALIGNED(
9303                                copy_addr + (tmp_entry->vme_end -
9304                                             tmp_entry->vme_start),
9305                                VM_MAP_COPY_PAGE_MASK(copy)));
9306                 assert(VM_MAP_PAGE_ALIGNED(
9307                                copy_addr,
9308                                VM_MAP_COPY_PAGE_MASK(copy)));
9309
9310                 /*
9311                  * The copy_entries will be injected directly into the
9312                  * destination map and might not be "map aligned" there...
9313                  */
9314                 tmp_entry->map_aligned = FALSE;
9315
9316                 tmp_entry->vme_end = copy_addr +
9317                         (tmp_entry->vme_end - tmp_entry->vme_start);
9318                 tmp_entry->vme_start = copy_addr;
9319                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
9320                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
9321                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
9322                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
9323         }
9324
9325         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
9326             copy_size < copy->size) {
9327                 /*
9328                  * The actual size of the VM map copy is smaller than what
9329                  * was requested by the caller.  This must be because some
9330                  * PAGE_SIZE-sized pages are missing at the end of the last
9331                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
9332                  * The caller might not have been aware of those missing
9333                  * pages and might not want to be aware of it, which is
9334                  * fine as long as they don't try to access (and crash on)
9335                  * those missing pages.
9336                  * Let's adjust the size of the "copy", to avoid failing
9337                  * in vm_map_copyout() or vm_map_copy_overwrite().
9338                  */
9339                 assert(vm_map_round_page(copy_size,
9340                                          VM_MAP_PAGE_MASK(src_map)) ==
9341                        vm_map_round_page(copy->size,
9342                                          VM_MAP_PAGE_MASK(src_map)));
9343                 copy->size = copy_size;
9344         }
9345
9346         *copy_result = copy;
9347         return(KERN_SUCCESS);
9348
9349 #undef  RETURN
9350 }
9351
9352 kern_return_t
9353 vm_map_copy_extract(
9354         vm_map_t                src_map,
9355         vm_map_address_t        src_addr,
9356         vm_map_size_t           len,
9357         vm_map_copy_t           *copy_result,   /* OUT */
9358         vm_prot_t               *cur_prot,      /* OUT */
9359         vm_prot_t               *max_prot)
9360 {
9361         vm_map_offset_t src_start, src_end;
9362         vm_map_copy_t   copy;
9363         kern_return_t   kr;
9364
9365         /*
9366          *      Check for copies of zero bytes.
9367          */
9368
9369         if (len == 0) {
9370                 *copy_result = VM_MAP_COPY_NULL;
9371                 return(KERN_SUCCESS);
9372         }
9373
9374         /*
9375          *      Check that the end address doesn't overflow
9376          */
9377         src_end = src_addr + len;
9378         if (src_end < src_addr)
9379                 return KERN_INVALID_ADDRESS;
9380
9381         /*
9382          *      Compute (page aligned) start and end of region
9383          */
9384         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
9385         src_end = vm_map_round_page(src_end, PAGE_MASK);
9386
9387         /*
9388          *      Allocate a header element for the list.
9389          *
9390          *      Use the start and end in the header to
9391          *      remember the endpoints prior to rounding.
9392          */
9393
9394         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9395         vm_map_copy_first_entry(copy) =
9396                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
9397         copy->type = VM_MAP_COPY_ENTRY_LIST;
9398         copy->cpy_hdr.nentries = 0;
9399         copy->cpy_hdr.entries_pageable = TRUE;
9400
9401         vm_map_store_init(&copy->cpy_hdr);
9402
9403         copy->offset = 0;
9404         copy->size = len;
9405
9406         kr = vm_map_remap_extract(src_map,
9407                                   src_addr,
9408                                   len,
9409                                   FALSE, /* copy */
9410                                   &copy->cpy_hdr,
9411                                   cur_prot,
9412                                   max_prot,
9413                                   VM_INHERIT_SHARE,
9414                                   TRUE); /* pageable */
9415         if (kr != KERN_SUCCESS) {
9416                 vm_map_copy_discard(copy);
9417                 return kr;
9418         }
9419
9420         *copy_result = copy;
9421         return KERN_SUCCESS;
9422 }
9423
9424 /*
9425  *      vm_map_copyin_object:
9426  *
9427  *      Create a copy object from an object.
9428  *      Our caller donates an object reference.
9429  */
9430
9431 kern_return_t
9432 vm_map_copyin_object(
9433         vm_object_t             object,
9434         vm_object_offset_t      offset, /* offset of region in object */
9435         vm_object_size_t        size,   /* size of region in object */
9436         vm_map_copy_t   *copy_result)   /* OUT */
9437 {
9438         vm_map_copy_t   copy;           /* Resulting copy */
9439
9440         /*
9441          *      We drop the object into a special copy object
9442          *      that contains the object directly.
9443          */
9444
9445         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
9446         copy->type = VM_MAP_COPY_OBJECT;
9447         copy->cpy_object = object;
9448         copy->offset = offset;
9449         copy->size = size;
9450
9451         *copy_result = copy;
9452         return(KERN_SUCCESS);
9453 }
9454
9455 static void
9456 vm_map_fork_share(
9457         vm_map_t        old_map,
9458         vm_map_entry_t  old_entry,
9459         vm_map_t        new_map)
9460 {
9461         vm_object_t     object;
9462         vm_map_entry_t  new_entry;
9463
9464         /*
9465          *      New sharing code.  New map entry
9466          *      references original object.  Internal
9467          *      objects use asynchronous copy algorithm for
9468          *      future copies.  First make sure we have
9469          *      the right object.  If we need a shadow,
9470          *      or someone else already has one, then
9471          *      make a new shadow and share it.
9472          */
9473
9474         object = old_entry->object.vm_object;
9475         if (old_entry->is_sub_map) {
9476                 assert(old_entry->wired_count == 0);
9477 #ifndef NO_NESTED_PMAP
9478                 if(old_entry->use_pmap) {
9479                         kern_return_t   result;
9480
9481                         result = pmap_nest(new_map->pmap,
9482                                            (old_entry->object.sub_map)->pmap,
9483                                            (addr64_t)old_entry->vme_start,
9484                                            (addr64_t)old_entry->vme_start,
9485                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
9486                         if(result)
9487                                 panic("vm_map_fork_share: pmap_nest failed!");
9488                 }
9489 #endif  /* NO_NESTED_PMAP */
9490         } else if (object == VM_OBJECT_NULL) {
9491                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
9492                                                             old_entry->vme_start));
9493                 old_entry->offset = 0;
9494                 old_entry->object.vm_object = object;
9495                 old_entry->use_pmap = TRUE;
9496                 assert(!old_entry->needs_copy);
9497         } else if (object->copy_strategy !=
9498                    MEMORY_OBJECT_COPY_SYMMETRIC) {
9499
9500                 /*
9501                  *      We are already using an asymmetric
9502                  *      copy, and therefore we already have
9503                  *      the right object.
9504                  */
9505
9506                 assert(! old_entry->needs_copy);
9507         }
9508         else if (old_entry->needs_copy ||       /* case 1 */
9509                  object->shadowed ||            /* case 2 */
9510                  (!object->true_share &&        /* case 3 */
9511                   !old_entry->is_shared &&
9512                   (object->vo_size >
9513                    (vm_map_size_t)(old_entry->vme_end -
9514                                    old_entry->vme_start)))) {
9515
9516                 /*
9517                  *      We need to create a shadow.
9518                  *      There are three cases here.
9519                  *      In the first case, we need to
9520                  *      complete a deferred symmetrical
9521                  *      copy that we participated in.
9522                  *      In the second and third cases,
9523                  *      we need to create the shadow so
9524                  *      that changes that we make to the
9525                  *      object do not interfere with
9526                  *      any symmetrical copies which
9527                  *      have occured (case 2) or which
9528                  *      might occur (case 3).
9529                  *
9530                  *      The first case is when we had
9531                  *      deferred shadow object creation
9532                  *      via the entry->needs_copy mechanism.
9533                  *      This mechanism only works when
9534                  *      only one entry points to the source
9535                  *      object, and we are about to create
9536                  *      a second entry pointing to the
9537                  *      same object. The problem is that
9538                  *      there is no way of mapping from
9539                  *      an object to the entries pointing
9540                  *      to it. (Deferred shadow creation
9541                  *      works with one entry because occurs
9542                  *      at fault time, and we walk from the
9543                  *      entry to the object when handling
9544                  *      the fault.)
9545                  *
9546                  *      The second case is when the object
9547                  *      to be shared has already been copied
9548                  *      with a symmetric copy, but we point
9549                  *      directly to the object without
9550                  *      needs_copy set in our entry. (This
9551                  *      can happen because different ranges
9552                  *      of an object can be pointed to by
9553                  *      different entries. In particular,
9554                  *      a single entry pointing to an object
9555                  *      can be split by a call to vm_inherit,
9556                  *      which, combined with task_create, can
9557                  *      result in the different entries
9558                  *      having different needs_copy values.)
9559                  *      The shadowed flag in the object allows
9560                  *      us to detect this case. The problem
9561                  *      with this case is that if this object
9562                  *      has or will have shadows, then we
9563                  *      must not perform an asymmetric copy
9564                  *      of this object, since such a copy
9565                  *      allows the object to be changed, which
9566                  *      will break the previous symmetrical
9567                  *      copies (which rely upon the object
9568                  *      not changing). In a sense, the shadowed
9569                  *      flag says "don't change this object".
9570                  *      We fix this by creating a shadow
9571                  *      object for this object, and sharing
9572                  *      that. This works because we are free
9573                  *      to change the shadow object (and thus
9574                  *      to use an asymmetric copy strategy);
9575                  *      this is also semantically correct,
9576                  *      since this object is temporary, and
9577                  *      therefore a copy of the object is
9578                  *      as good as the object itself. (This
9579                  *      is not true for permanent objects,
9580                  *      since the pager needs to see changes,
9581                  *      which won't happen if the changes
9582                  *      are made to a copy.)
9583                  *
9584                  *      The third case is when the object
9585                  *      to be shared has parts sticking
9586                  *      outside of the entry we're working
9587                  *      with, and thus may in the future
9588                  *      be subject to a symmetrical copy.
9589                  *      (This is a preemptive version of
9590                  *      case 2.)
9591                  */
9592                 vm_object_shadow(&old_entry->object.vm_object,
9593                                  &old_entry->offset,
9594                                  (vm_map_size_t) (old_entry->vme_end -
9595                                                   old_entry->vme_start));
9596
9597                 /*
9598                  *      If we're making a shadow for other than
9599                  *      copy on write reasons, then we have
9600                  *      to remove write permission.
9601                  */
9602
9603                 if (!old_entry->needs_copy &&
9604                     (old_entry->protection & VM_PROT_WRITE)) {
9605                         vm_prot_t prot;
9606
9607                         prot = old_entry->protection & ~VM_PROT_WRITE;
9608
9609                         if (override_nx(old_map, old_entry->alias) && prot)
9610                                 prot |= VM_PROT_EXECUTE;
9611
9612                         if (old_map->mapped_in_other_pmaps) {
9613                                 vm_object_pmap_protect(
9614                                         old_entry->object.vm_object,
9615                                         old_entry->offset,
9616                                         (old_entry->vme_end -
9617                                          old_entry->vme_start),
9618                                         PMAP_NULL,
9619                                         old_entry->vme_start,
9620                                         prot);
9621                         } else {
9622                                 pmap_protect(old_map->pmap,
9623                                              old_entry->vme_start,
9624                                              old_entry->vme_end,
9625                                              prot);
9626                         }
9627                 }
9628
9629                 old_entry->needs_copy = FALSE;
9630                 object = old_entry->object.vm_object;
9631         }
9632
9633
9634         /*
9635          *      If object was using a symmetric copy strategy,
9636          *      change its copy strategy to the default
9637          *      asymmetric copy strategy, which is copy_delay
9638          *      in the non-norma case and copy_call in the
9639          *      norma case. Bump the reference count for the
9640          *      new entry.
9641          */
9642
9643         if(old_entry->is_sub_map) {
9644                 vm_map_lock(old_entry->object.sub_map);
9645                 vm_map_reference(old_entry->object.sub_map);
9646                 vm_map_unlock(old_entry->object.sub_map);
9647         } else {
9648                 vm_object_lock(object);
9649                 vm_object_reference_locked(object);
9650                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
9651                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
9652                 }
9653                 vm_object_unlock(object);
9654         }
9655
9656         /*
9657          *      Clone the entry, using object ref from above.
9658          *      Mark both entries as shared.
9659          */
9660
9661         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
9662                                                           * map or descendants */
9663         vm_map_entry_copy(new_entry, old_entry);
9664         old_entry->is_shared = TRUE;
9665         new_entry->is_shared = TRUE;
9666
9667         /*
9668          *      Insert the entry into the new map -- we
9669          *      know we're inserting at the end of the new
9670          *      map.
9671          */
9672
9673         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
9674
9675         /*
9676          *      Update the physical map
9677          */
9678
9679         if (old_entry->is_sub_map) {
9680                 /* Bill Angell pmap support goes here */
9681         } else {
9682                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
9683                           old_entry->vme_end - old_entry->vme_start,
9684                           old_entry->vme_start);
9685         }
9686 }
9687
9688 static boolean_t
9689 vm_map_fork_copy(
9690         vm_map_t        old_map,
9691         vm_map_entry_t  *old_entry_p,
9692         vm_map_t        new_map)
9693 {
9694         vm_map_entry_t old_entry = *old_entry_p;
9695         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
9696         vm_map_offset_t start = old_entry->vme_start;
9697         vm_map_copy_t copy;
9698         vm_map_entry_t last = vm_map_last_entry(new_map);
9699
9700         vm_map_unlock(old_map);
9701         /*
9702          *      Use maxprot version of copyin because we
9703          *      care about whether this memory can ever
9704          *      be accessed, not just whether it's accessible
9705          *      right now.
9706          */
9707         if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, &copy)
9708             != KERN_SUCCESS) {
9709                 /*
9710                  *      The map might have changed while it
9711                  *      was unlocked, check it again.  Skip
9712                  *      any blank space or permanently
9713                  *      unreadable region.
9714                  */
9715                 vm_map_lock(old_map);
9716                 if (!vm_map_lookup_entry(old_map, start, &last) ||
9717                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
9718                         last = last->vme_next;
9719                 }
9720                 *old_entry_p = last;
9721
9722                 /*
9723                  * XXX  For some error returns, want to
9724                  * XXX  skip to the next element.  Note
9725                  *      that INVALID_ADDRESS and
9726                  *      PROTECTION_FAILURE are handled above.
9727                  */
9728
9729                 return FALSE;
9730         }
9731
9732         /*
9733          *      Insert the copy into the new map
9734          */
9735
9736         vm_map_copy_insert(new_map, last, copy);
9737
9738         /*
9739          *      Pick up the traversal at the end of
9740          *      the copied region.
9741          */
9742
9743         vm_map_lock(old_map);
9744         start += entry_size;
9745         if (! vm_map_lookup_entry(old_map, start, &last)) {
9746                 last = last->vme_next;
9747         } else {
9748                 if (last->vme_start == start) {
9749                         /*
9750                          * No need to clip here and we don't
9751                          * want to cause any unnecessary
9752                          * unnesting...
9753                          */
9754                 } else {
9755                         vm_map_clip_start(old_map, last, start);
9756                 }
9757         }
9758         *old_entry_p = last;
9759
9760         return TRUE;
9761 }
9762
9763 /*
9764  *      vm_map_fork:
9765  *
9766  *      Create and return a new map based on the old
9767  *      map, according to the inheritance values on the
9768  *      regions in that map.
9769  *
9770  *      The source map must not be locked.
9771  */
9772 vm_map_t
9773 vm_map_fork(
9774         ledger_t        ledger,
9775         vm_map_t        old_map)
9776 {
9777         pmap_t          new_pmap;
9778         vm_map_t        new_map;
9779         vm_map_entry_t  old_entry;
9780         vm_map_size_t   new_size = 0, entry_size;
9781         vm_map_entry_t  new_entry;
9782         boolean_t       src_needs_copy;
9783         boolean_t       new_entry_needs_copy;
9784
9785         new_pmap = pmap_create(ledger, (vm_map_size_t) 0,
9786 #if defined(__i386__) || defined(__x86_64__)
9787                                old_map->pmap->pm_task_map != TASK_MAP_32BIT
9788 #else
9789 #error Unknown architecture.
9790 #endif
9791                                );
9792
9793         vm_map_reference_swap(old_map);
9794         vm_map_lock(old_map);
9795
9796         new_map = vm_map_create(new_pmap,
9797                                 old_map->min_offset,
9798                                 old_map->max_offset,
9799                                 old_map->hdr.entries_pageable);
9800         /* inherit the parent map's page size */
9801         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
9802         for (
9803                 old_entry = vm_map_first_entry(old_map);
9804                 old_entry != vm_map_to_entry(old_map);
9805                 ) {
9806
9807                 entry_size = old_entry->vme_end - old_entry->vme_start;
9808
9809                 switch (old_entry->inheritance) {
9810                 case VM_INHERIT_NONE:
9811                         break;
9812
9813                 case VM_INHERIT_SHARE:
9814                         vm_map_fork_share(old_map, old_entry, new_map);
9815                         new_size += entry_size;
9816                         break;
9817
9818                 case VM_INHERIT_COPY:
9819
9820                         /*
9821                          *      Inline the copy_quickly case;
9822                          *      upon failure, fall back on call
9823                          *      to vm_map_fork_copy.
9824                          */
9825
9826                         if(old_entry->is_sub_map)
9827                                 break;
9828                         if ((old_entry->wired_count != 0) ||
9829                             ((old_entry->object.vm_object != NULL) &&
9830                              (old_entry->object.vm_object->true_share))) {
9831                                 goto slow_vm_map_fork_copy;
9832                         }
9833
9834                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
9835                         vm_map_entry_copy(new_entry, old_entry);
9836                         if (new_entry->is_sub_map) {
9837                                 /* clear address space specifics */
9838                                 new_entry->use_pmap = FALSE;
9839                         }
9840
9841                         if (! vm_object_copy_quickly(
9842                                     &new_entry->object.vm_object,
9843                                     old_entry->offset,
9844                                     (old_entry->vme_end -
9845                                      old_entry->vme_start),
9846                                     &src_needs_copy,
9847                                     &new_entry_needs_copy)) {
9848                                 vm_map_entry_dispose(new_map, new_entry);
9849                                 goto slow_vm_map_fork_copy;
9850                         }
9851
9852                         /*
9853                          *      Handle copy-on-write obligations
9854                          */
9855
9856                         if (src_needs_copy && !old_entry->needs_copy) {
9857                                 vm_prot_t prot;
9858
9859                                 prot = old_entry->protection & ~VM_PROT_WRITE;
9860
9861                                 if (override_nx(old_map, old_entry->alias) && prot)
9862                                         prot |= VM_PROT_EXECUTE;
9863
9864                                 vm_object_pmap_protect(
9865                                         old_entry->object.vm_object,
9866                                         old_entry->offset,
9867                                         (old_entry->vme_end -
9868                                          old_entry->vme_start),
9869                                         ((old_entry->is_shared
9870                                           || old_map->mapped_in_other_pmaps)
9871                                          ? PMAP_NULL :
9872                                          old_map->pmap),
9873                                         old_entry->vme_start,
9874                                         prot);
9875
9876                                 old_entry->needs_copy = TRUE;
9877                         }
9878                         new_entry->needs_copy = new_entry_needs_copy;
9879
9880                         /*
9881                          *      Insert the entry at the end
9882                          *      of the map.
9883                          */
9884
9885                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
9886                                           new_entry);
9887                         new_size += entry_size;
9888                         break;
9889
9890                 slow_vm_map_fork_copy:
9891                         if (vm_map_fork_copy(old_map, &old_entry, new_map)) {
9892                                 new_size += entry_size;
9893                         }
9894                         continue;
9895                 }
9896                 old_entry = old_entry->vme_next;
9897         }
9898
9899
9900         new_map->size = new_size;
9901         vm_map_unlock(old_map);
9902         vm_map_deallocate(old_map);
9903
9904         return(new_map);
9905 }
9906
9907 /*
9908  * vm_map_exec:
9909  *
9910  *      Setup the "new_map" with the proper execution environment according
9911  *      to the type of executable (platform, 64bit, chroot environment).
9912  *      Map the comm page and shared region, etc...
9913  */
9914 kern_return_t
9915 vm_map_exec(
9916         vm_map_t        new_map,
9917         task_t          task,
9918         void            *fsroot,
9919         cpu_type_t      cpu)
9920 {
9921         SHARED_REGION_TRACE_DEBUG(
9922                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
9923                  (void *)VM_KERNEL_ADDRPERM(current_task()),
9924                  (void *)VM_KERNEL_ADDRPERM(new_map),
9925                  (void *)VM_KERNEL_ADDRPERM(task),
9926                  (void *)VM_KERNEL_ADDRPERM(fsroot),
9927                  cpu));
9928         (void) vm_commpage_enter(new_map, task);
9929         (void) vm_shared_region_enter(new_map, task, fsroot, cpu);
9930         SHARED_REGION_TRACE_DEBUG(
9931                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
9932                  (void *)VM_KERNEL_ADDRPERM(current_task()),
9933                  (void *)VM_KERNEL_ADDRPERM(new_map),
9934                  (void *)VM_KERNEL_ADDRPERM(task),
9935                  (void *)VM_KERNEL_ADDRPERM(fsroot),
9936                  cpu));
9937         return KERN_SUCCESS;
9938 }
9939
9940 /*
9941  *      vm_map_lookup_locked:
9942  *
9943  *      Finds the VM object, offset, and
9944  *      protection for a given virtual address in the
9945  *      specified map, assuming a page fault of the
9946  *      type specified.
9947  *
9948  *      Returns the (object, offset, protection) for
9949  *      this address, whether it is wired down, and whether
9950  *      this map has the only reference to the data in question.
9951  *      In order to later verify this lookup, a "version"
9952  *      is returned.
9953  *
9954  *      The map MUST be locked by the caller and WILL be
9955  *      locked on exit.  In order to guarantee the
9956  *      existence of the returned object, it is returned
9957  *      locked.
9958  *
9959  *      If a lookup is requested with "write protection"
9960  *      specified, the map may be changed to perform virtual
9961  *      copying operations, although the data referenced will
9962  *      remain the same.
9963  */
9964 kern_return_t
9965 vm_map_lookup_locked(
9966         vm_map_t                *var_map,       /* IN/OUT */
9967         vm_map_offset_t         vaddr,
9968         vm_prot_t               fault_type,
9969         int                     object_lock_type,
9970         vm_map_version_t        *out_version,   /* OUT */
9971         vm_object_t             *object,        /* OUT */
9972         vm_object_offset_t      *offset,        /* OUT */
9973         vm_prot_t               *out_prot,      /* OUT */
9974         boolean_t               *wired,         /* OUT */
9975         vm_object_fault_info_t  fault_info,     /* OUT */
9976         vm_map_t                *real_map)
9977 {
9978         vm_map_entry_t                  entry;
9979         register vm_map_t               map = *var_map;
9980         vm_map_t                        old_map = *var_map;
9981         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
9982         vm_map_offset_t                 cow_parent_vaddr = 0;
9983         vm_map_offset_t                 old_start = 0;
9984         vm_map_offset_t                 old_end = 0;
9985         register vm_prot_t              prot;
9986         boolean_t                       mask_protections;
9987         boolean_t                       force_copy;
9988         vm_prot_t                       original_fault_type;
9989
9990         /*
9991          * VM_PROT_MASK means that the caller wants us to use "fault_type"
9992          * as a mask against the mapping's actual protections, not as an
9993          * absolute value.
9994          */
9995         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
9996         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
9997         fault_type &= VM_PROT_ALL;
9998         original_fault_type = fault_type;
9999
10000         *real_map = map;
10001
10002 RetryLookup:
10003         fault_type = original_fault_type;
10004
10005         /*
10006          *      If the map has an interesting hint, try it before calling
10007          *      full blown lookup routine.
10008          */
10009         entry = map->hint;
10010
10011         if ((entry == vm_map_to_entry(map)) ||
10012             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
10013                 vm_map_entry_t  tmp_entry;
10014
10015                 /*
10016                  *      Entry was either not a valid hint, or the vaddr
10017                  *      was not contained in the entry, so do a full lookup.
10018                  */
10019                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
10020                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
10021                                 vm_map_unlock(cow_sub_map_parent);
10022                         if((*real_map != map)
10023                            && (*real_map != cow_sub_map_parent))
10024                                 vm_map_unlock(*real_map);
10025                         return KERN_INVALID_ADDRESS;
10026                 }
10027
10028                 entry = tmp_entry;
10029         }
10030         if(map == old_map) {
10031                 old_start = entry->vme_start;
10032                 old_end = entry->vme_end;
10033         }
10034
10035         /*
10036          *      Handle submaps.  Drop lock on upper map, submap is
10037          *      returned locked.
10038          */
10039
10040 submap_recurse:
10041         if (entry->is_sub_map) {
10042                 vm_map_offset_t         local_vaddr;
10043                 vm_map_offset_t         end_delta;
10044                 vm_map_offset_t         start_delta;
10045                 vm_map_entry_t          submap_entry;
10046                 boolean_t               mapped_needs_copy=FALSE;
10047
10048                 local_vaddr = vaddr;
10049
10050                 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) {
10051                         /* if real_map equals map we unlock below */
10052                         if ((*real_map != map) &&
10053                             (*real_map != cow_sub_map_parent))
10054                                 vm_map_unlock(*real_map);
10055                         *real_map = entry->object.sub_map;
10056                 }
10057
10058                 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) {
10059                         if (!mapped_needs_copy) {
10060                                 if (vm_map_lock_read_to_write(map)) {
10061                                         vm_map_lock_read(map);
10062                                         *real_map = map;
10063                                         goto RetryLookup;
10064                                 }
10065                                 vm_map_lock_read(entry->object.sub_map);
10066                                 *var_map = entry->object.sub_map;
10067                                 cow_sub_map_parent = map;
10068                                 /* reset base to map before cow object */
10069                                 /* this is the map which will accept   */
10070                                 /* the new cow object */
10071                                 old_start = entry->vme_start;
10072                                 old_end = entry->vme_end;
10073                                 cow_parent_vaddr = vaddr;
10074                                 mapped_needs_copy = TRUE;
10075                         } else {
10076                                 vm_map_lock_read(entry->object.sub_map);
10077                                 *var_map = entry->object.sub_map;
10078                                 if((cow_sub_map_parent != map) &&
10079                                    (*real_map != map))
10080                                         vm_map_unlock(map);
10081                         }
10082                 } else {
10083                         vm_map_lock_read(entry->object.sub_map);
10084                         *var_map = entry->object.sub_map;
10085                         /* leave map locked if it is a target */
10086                         /* cow sub_map above otherwise, just  */
10087                         /* follow the maps down to the object */
10088                         /* here we unlock knowing we are not  */
10089                         /* revisiting the map.  */
10090                         if((*real_map != map) && (map != cow_sub_map_parent))
10091                                 vm_map_unlock_read(map);
10092                 }
10093
10094                 map = *var_map;
10095
10096                 /* calculate the offset in the submap for vaddr */
10097                 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset;
10098
10099         RetrySubMap:
10100                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
10101                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
10102                                 vm_map_unlock(cow_sub_map_parent);
10103                         }
10104                         if((*real_map != map)
10105                            && (*real_map != cow_sub_map_parent)) {
10106                                 vm_map_unlock(*real_map);
10107                         }
10108                         *real_map = map;
10109                         return KERN_INVALID_ADDRESS;
10110                 }
10111
10112                 /* find the attenuated shadow of the underlying object */
10113                 /* on our target map */
10114
10115                 /* in english the submap object may extend beyond the     */
10116                 /* region mapped by the entry or, may only fill a portion */
10117                 /* of it.  For our purposes, we only care if the object   */
10118                 /* doesn't fill.  In this case the area which will        */
10119                 /* ultimately be clipped in the top map will only need    */
10120                 /* to be as big as the portion of the underlying entry    */
10121                 /* which is mapped */
10122                 start_delta = submap_entry->vme_start > entry->offset ?
10123                         submap_entry->vme_start - entry->offset : 0;
10124
10125                 end_delta =
10126                         (entry->offset + start_delta + (old_end - old_start)) <=
10127                         submap_entry->vme_end ?
10128                         0 : (entry->offset +
10129                              (old_end - old_start))
10130                         - submap_entry->vme_end;
10131
10132                 old_start += start_delta;
10133                 old_end -= end_delta;
10134
10135                 if(submap_entry->is_sub_map) {
10136                         entry = submap_entry;
10137                         vaddr = local_vaddr;
10138                         goto submap_recurse;
10139                 }
10140
10141                 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) {
10142
10143                         vm_object_t     sub_object, copy_object;
10144                         vm_object_offset_t copy_offset;
10145                         vm_map_offset_t local_start;
10146                         vm_map_offset_t local_end;
10147                         boolean_t               copied_slowly = FALSE;
10148
10149                         if (vm_map_lock_read_to_write(map)) {
10150                                 vm_map_lock_read(map);
10151                                 old_start -= start_delta;
10152                                 old_end += end_delta;
10153                                 goto RetrySubMap;
10154                         }
10155
10156
10157                         sub_object = submap_entry->object.vm_object;
10158                         if (sub_object == VM_OBJECT_NULL) {
10159                                 sub_object =
10160                                         vm_object_allocate(
10161                                                 (vm_map_size_t)
10162                                                 (submap_entry->vme_end -
10163                                                  submap_entry->vme_start));
10164                                 submap_entry->object.vm_object = sub_object;
10165                                 submap_entry->offset = 0;
10166                         }
10167                         local_start =  local_vaddr -
10168                                 (cow_parent_vaddr - old_start);
10169                         local_end = local_vaddr +
10170                                 (old_end - cow_parent_vaddr);
10171                         vm_map_clip_start(map, submap_entry, local_start);
10172                         vm_map_clip_end(map, submap_entry, local_end);
10173                         if (submap_entry->is_sub_map) {
10174                                 /* unnesting was done when clipping */
10175                                 assert(!submap_entry->use_pmap);
10176                         }
10177
10178                         /* This is the COW case, lets connect */
10179                         /* an entry in our space to the underlying */
10180                         /* object in the submap, bypassing the  */
10181                         /* submap. */
10182
10183
10184                         if(submap_entry->wired_count != 0 ||
10185                            (sub_object->copy_strategy ==
10186                             MEMORY_OBJECT_COPY_NONE)) {
10187                                 vm_object_lock(sub_object);
10188                                 vm_object_copy_slowly(sub_object,
10189                                                       submap_entry->offset,
10190                                                       (submap_entry->vme_end -
10191                                                        submap_entry->vme_start),
10192                                                       FALSE,
10193                                                       &copy_object);
10194                                 copied_slowly = TRUE;
10195                         } else {
10196
10197                                 /* set up shadow object */
10198                                 copy_object = sub_object;
10199                                 vm_object_reference(copy_object);
10200                                 sub_object->shadowed = TRUE;
10201                                 submap_entry->needs_copy = TRUE;
10202
10203                                 prot = submap_entry->protection & ~VM_PROT_WRITE;
10204
10205                                 if (override_nx(old_map, submap_entry->alias) && prot)
10206                                         prot |= VM_PROT_EXECUTE;
10207
10208                                 vm_object_pmap_protect(
10209                                         sub_object,
10210                                         submap_entry->offset,
10211                                         submap_entry->vme_end -
10212                                         submap_entry->vme_start,
10213                                         (submap_entry->is_shared
10214                                          || map->mapped_in_other_pmaps) ?
10215                                         PMAP_NULL : map->pmap,
10216                                         submap_entry->vme_start,
10217                                         prot);
10218                         }
10219
10220                         /*
10221                          * Adjust the fault offset to the submap entry.
10222                          */
10223                         copy_offset = (local_vaddr -
10224                                        submap_entry->vme_start +
10225                                        submap_entry->offset);
10226
10227                         /* This works diffently than the   */
10228                         /* normal submap case. We go back  */
10229                         /* to the parent of the cow map and*/
10230                         /* clip out the target portion of  */
10231                         /* the sub_map, substituting the   */
10232                         /* new copy object,                */
10233
10234                         vm_map_unlock(map);
10235                         local_start = old_start;
10236                         local_end = old_end;
10237                         map = cow_sub_map_parent;
10238                         *var_map = cow_sub_map_parent;
10239                         vaddr = cow_parent_vaddr;
10240                         cow_sub_map_parent = NULL;
10241
10242                         if(!vm_map_lookup_entry(map,
10243                                                 vaddr, &entry)) {
10244                                 vm_object_deallocate(
10245                                         copy_object);
10246                                 vm_map_lock_write_to_read(map);
10247                                 return KERN_INVALID_ADDRESS;
10248                         }
10249
10250                         /* clip out the portion of space */
10251                         /* mapped by the sub map which   */
10252                         /* corresponds to the underlying */
10253                         /* object */
10254
10255                         /*
10256                          * Clip (and unnest) the smallest nested chunk
10257                          * possible around the faulting address...
10258                          */
10259                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
10260                         local_end = local_start + pmap_nesting_size_min;
10261                         /*
10262                          * ... but don't go beyond the "old_start" to "old_end"
10263                          * range, to avoid spanning over another VM region
10264                          * with a possibly different VM object and/or offset.
10265                          */
10266                         if (local_start < old_start) {
10267                                 local_start = old_start;
10268                         }
10269                         if (local_end > old_end) {
10270                                 local_end = old_end;
10271                         }
10272                         /*
10273                          * Adjust copy_offset to the start of the range.
10274                          */
10275                         copy_offset -= (vaddr - local_start);
10276
10277                         vm_map_clip_start(map, entry, local_start);
10278                         vm_map_clip_end(map, entry, local_end);
10279                         if (entry->is_sub_map) {
10280                                 /* unnesting was done when clipping */
10281                                 assert(!entry->use_pmap);
10282                         }
10283
10284                         /* substitute copy object for */
10285                         /* shared map entry           */
10286                         vm_map_deallocate(entry->object.sub_map);
10287                         assert(!entry->iokit_acct);
10288                         entry->is_sub_map = FALSE;
10289                         entry->use_pmap = TRUE;
10290                         entry->object.vm_object = copy_object;
10291
10292                         /* propagate the submap entry's protections */
10293                         entry->protection |= submap_entry->protection;
10294                         entry->max_protection |= submap_entry->max_protection;
10295
10296                         if(copied_slowly) {
10297                                 entry->offset = local_start - old_start;
10298                                 entry->needs_copy = FALSE;
10299                                 entry->is_shared = FALSE;
10300                         } else {
10301                                 entry->offset = copy_offset;
10302                                 entry->needs_copy = TRUE;
10303                                 if(entry->inheritance == VM_INHERIT_SHARE)
10304                                         entry->inheritance = VM_INHERIT_COPY;
10305                                 if (map != old_map)
10306                                         entry->is_shared = TRUE;
10307                         }
10308                         if(entry->inheritance == VM_INHERIT_SHARE)
10309                                 entry->inheritance = VM_INHERIT_COPY;
10310
10311                         vm_map_lock_write_to_read(map);
10312                 } else {
10313                         if((cow_sub_map_parent)
10314                            && (cow_sub_map_parent != *real_map)
10315                            && (cow_sub_map_parent != map)) {
10316                                 vm_map_unlock(cow_sub_map_parent);
10317                         }
10318                         entry = submap_entry;
10319                         vaddr = local_vaddr;
10320                 }
10321         }
10322
10323         /*
10324          *      Check whether this task is allowed to have
10325          *      this page.
10326          */
10327
10328         prot = entry->protection;
10329
10330         if (override_nx(old_map, entry->alias) && prot) {
10331                 /*
10332                  * HACK -- if not a stack, then allow execution
10333                  */
10334                 prot |= VM_PROT_EXECUTE;
10335         }
10336
10337         if (mask_protections) {
10338                 fault_type &= prot;
10339                 if (fault_type == VM_PROT_NONE) {
10340                         goto protection_failure;
10341                 }
10342         }
10343         if ((fault_type & (prot)) != fault_type) {
10344         protection_failure:
10345                 if (*real_map != map) {
10346                         vm_map_unlock(*real_map);
10347                 }
10348                 *real_map = map;
10349
10350                 if ((fault_type & VM_PROT_EXECUTE) && prot)
10351                         log_stack_execution_failure((addr64_t)vaddr, prot);
10352
10353                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
10354                 return KERN_PROTECTION_FAILURE;
10355         }
10356
10357         /*
10358          *      If this page is not pageable, we have to get
10359          *      it for all possible accesses.
10360          */
10361
10362         *wired = (entry->wired_count != 0);
10363         if (*wired)
10364                 fault_type = prot;
10365
10366         /*
10367          *      If the entry was copy-on-write, we either ...
10368          */
10369
10370         if (entry->needs_copy) {
10371                 /*
10372                  *      If we want to write the page, we may as well
10373                  *      handle that now since we've got the map locked.
10374                  *
10375                  *      If we don't need to write the page, we just
10376                  *      demote the permissions allowed.
10377                  */
10378
10379                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
10380                         /*
10381                          *      Make a new object, and place it in the
10382                          *      object chain.  Note that no new references
10383                          *      have appeared -- one just moved from the
10384                          *      map to the new object.
10385                          */
10386
10387                         if (vm_map_lock_read_to_write(map)) {
10388                                 vm_map_lock_read(map);
10389                                 goto RetryLookup;
10390                         }
10391                         vm_object_shadow(&entry->object.vm_object,
10392                                          &entry->offset,
10393                                          (vm_map_size_t) (entry->vme_end -
10394                                                           entry->vme_start));
10395
10396                         entry->object.vm_object->shadowed = TRUE;
10397                         entry->needs_copy = FALSE;
10398                         vm_map_lock_write_to_read(map);
10399                 }
10400                 else {
10401                         /*
10402                          *      We're attempting to read a copy-on-write
10403                          *      page -- don't allow writes.
10404                          */
10405
10406                         prot &= (~VM_PROT_WRITE);
10407                 }
10408         }
10409
10410         /*
10411          *      Create an object if necessary.
10412          */
10413         if (entry->object.vm_object == VM_OBJECT_NULL) {
10414
10415                 if (vm_map_lock_read_to_write(map)) {
10416                         vm_map_lock_read(map);
10417                         goto RetryLookup;
10418                 }
10419
10420                 entry->object.vm_object = vm_object_allocate(
10421                         (vm_map_size_t)(entry->vme_end - entry->vme_start));
10422                 entry->offset = 0;
10423                 vm_map_lock_write_to_read(map);
10424         }
10425
10426         /*
10427          *      Return the object/offset from this entry.  If the entry
10428          *      was copy-on-write or empty, it has been fixed up.  Also
10429          *      return the protection.
10430          */
10431
10432         *offset = (vaddr - entry->vme_start) + entry->offset;
10433         *object = entry->object.vm_object;
10434         *out_prot = prot;
10435
10436         if (fault_info) {
10437                 fault_info->interruptible = THREAD_UNINT; /* for now... */
10438                 /* ... the caller will change "interruptible" if needed */
10439                 fault_info->cluster_size = 0;
10440                 fault_info->user_tag = entry->alias;
10441                 fault_info->pmap_options = 0;
10442                 if (entry->iokit_acct ||
10443                     (!entry->is_sub_map && !entry->use_pmap)) {
10444                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
10445                 }
10446                 fault_info->behavior = entry->behavior;
10447                 fault_info->lo_offset = entry->offset;
10448                 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset;
10449                 fault_info->no_cache  = entry->no_cache;
10450                 fault_info->stealth = FALSE;
10451                 fault_info->io_sync = FALSE;
10452                 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE;
10453                 fault_info->mark_zf_absent = FALSE;
10454                 fault_info->batch_pmap_op = FALSE;
10455         }
10456
10457         /*
10458          *      Lock the object to prevent it from disappearing
10459          */
10460         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
10461                 vm_object_lock(*object);
10462         else
10463                 vm_object_lock_shared(*object);
10464
10465         /*
10466          *      Save the version number
10467          */
10468
10469         out_version->main_timestamp = map->timestamp;
10470
10471         return KERN_SUCCESS;
10472 }
10473
10474
10475 /*
10476  *      vm_map_verify:
10477  *
10478  *      Verifies that the map in question has not changed
10479  *      since the given version.  If successful, the map
10480  *      will not change until vm_map_verify_done() is called.
10481  */
10482 boolean_t
10483 vm_map_verify(
10484         register vm_map_t               map,
10485         register vm_map_version_t       *version)       /* REF */
10486 {
10487         boolean_t       result;
10488
10489         vm_map_lock_read(map);
10490         result = (map->timestamp == version->main_timestamp);
10491
10492         if (!result)
10493                 vm_map_unlock_read(map);
10494
10495         return(result);
10496 }
10497
10498 /*
10499  *      vm_map_verify_done:
10500  *
10501  *      Releases locks acquired by a vm_map_verify.
10502  *
10503  *      This is now a macro in vm/vm_map.h.  It does a
10504  *      vm_map_unlock_read on the map.
10505  */
10506
10507
10508 /*
10509  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
10510  *      Goes away after regular vm_region_recurse function migrates to
10511  *      64 bits
10512  *      vm_region_recurse: A form of vm_region which follows the
10513  *      submaps in a target map
10514  *
10515  */
10516
10517 kern_return_t
10518 vm_map_region_recurse_64(
10519         vm_map_t                 map,
10520         vm_map_offset_t *address,               /* IN/OUT */
10521         vm_map_size_t           *size,                  /* OUT */
10522         natural_t               *nesting_depth, /* IN/OUT */
10523         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
10524         mach_msg_type_number_t  *count) /* IN/OUT */
10525 {
10526         mach_msg_type_number_t  original_count;
10527         vm_region_extended_info_data_t  extended;
10528         vm_map_entry_t                  tmp_entry;
10529         vm_map_offset_t                 user_address;
10530         unsigned int                    user_max_depth;
10531
10532         /*
10533          * "curr_entry" is the VM map entry preceding or including the
10534          * address we're looking for.
10535          * "curr_map" is the map or sub-map containing "curr_entry".
10536          * "curr_address" is the equivalent of the top map's "user_address"
10537          * in the current map.
10538          * "curr_offset" is the cumulated offset of "curr_map" in the
10539          * target task's address space.
10540          * "curr_depth" is the depth of "curr_map" in the chain of
10541          * sub-maps.
10542          *
10543          * "curr_max_below" and "curr_max_above" limit the range (around
10544          * "curr_address") we should take into account in the current (sub)map.
10545          * They limit the range to what's visible through the map entries
10546          * we've traversed from the top map to the current map.
10547
10548          */
10549         vm_map_entry_t                  curr_entry;
10550         vm_map_address_t                curr_address;
10551         vm_map_offset_t                 curr_offset;
10552         vm_map_t                        curr_map;
10553         unsigned int                    curr_depth;
10554         vm_map_offset_t                 curr_max_below, curr_max_above;
10555         vm_map_offset_t                 curr_skip;
10556
10557         /*
10558          * "next_" is the same as "curr_" but for the VM region immediately
10559          * after the address we're looking for.  We need to keep track of this
10560          * too because we want to return info about that region if the
10561          * address we're looking for is not mapped.
10562          */
10563         vm_map_entry_t                  next_entry;
10564         vm_map_offset_t                 next_offset;
10565         vm_map_offset_t                 next_address;
10566         vm_map_t                        next_map;
10567         unsigned int                    next_depth;
10568         vm_map_offset_t                 next_max_below, next_max_above;
10569         vm_map_offset_t                 next_skip;
10570
10571         boolean_t                       look_for_pages;
10572         vm_region_submap_short_info_64_t short_info;
10573
10574         if (map == VM_MAP_NULL) {
10575                 /* no address space to work on */
10576                 return KERN_INVALID_ARGUMENT;
10577         }
10578
10579
10580         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
10581                 /*
10582                  * "info" structure is not big enough and
10583                  * would overflow
10584                  */
10585                 return KERN_INVALID_ARGUMENT;
10586         }
10587
10588         original_count = *count;
10589
10590         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
10591                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
10592                 look_for_pages = FALSE;
10593                 short_info = (vm_region_submap_short_info_64_t) submap_info;
10594                 submap_info = NULL;
10595         } else {
10596                 look_for_pages = TRUE;
10597                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
10598                 short_info = NULL;
10599
10600                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10601                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
10602                 }
10603         }
10604
10605         user_address = *address;
10606         user_max_depth = *nesting_depth;
10607
10608         curr_entry = NULL;
10609         curr_map = map;
10610         curr_address = user_address;
10611         curr_offset = 0;
10612         curr_skip = 0;
10613         curr_depth = 0;
10614         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
10615         curr_max_below = curr_address;
10616
10617         next_entry = NULL;
10618         next_map = NULL;
10619         next_address = 0;
10620         next_offset = 0;
10621         next_skip = 0;
10622         next_depth = 0;
10623         next_max_above = (vm_map_offset_t) -1;
10624         next_max_below = (vm_map_offset_t) -1;
10625
10626         if (not_in_kdp) {
10627                 vm_map_lock_read(curr_map);
10628         }
10629
10630         for (;;) {
10631                 if (vm_map_lookup_entry(curr_map,
10632                                         curr_address,
10633                                         &tmp_entry)) {
10634                         /* tmp_entry contains the address we're looking for */
10635                         curr_entry = tmp_entry;
10636                 } else {
10637                         vm_map_offset_t skip;
10638                         /*
10639                          * The address is not mapped.  "tmp_entry" is the
10640                          * map entry preceding the address.  We want the next
10641                          * one, if it exists.
10642                          */
10643                         curr_entry = tmp_entry->vme_next;
10644
10645                         if (curr_entry == vm_map_to_entry(curr_map) ||
10646                             (curr_entry->vme_start >=
10647                              curr_address + curr_max_above)) {
10648                                 /* no next entry at this level: stop looking */
10649                                 if (not_in_kdp) {
10650                                         vm_map_unlock_read(curr_map);
10651                                 }
10652                                 curr_entry = NULL;
10653                                 curr_map = NULL;
10654                                 curr_offset = 0;
10655                                 curr_depth = 0;
10656                                 curr_max_above = 0;
10657                                 curr_max_below = 0;
10658                                 break;
10659                         }
10660
10661                         /* adjust current address and offset */
10662                         skip = curr_entry->vme_start - curr_address;
10663                         curr_address = curr_entry->vme_start;
10664                         curr_skip = skip;
10665                         curr_offset += skip;
10666                         curr_max_above -= skip;
10667                         curr_max_below = 0;
10668                 }
10669
10670                 /*
10671                  * Is the next entry at this level closer to the address (or
10672                  * deeper in the submap chain) than the one we had
10673                  * so far ?
10674                  */
10675                 tmp_entry = curr_entry->vme_next;
10676                 if (tmp_entry == vm_map_to_entry(curr_map)) {
10677                         /* no next entry at this level */
10678                 } else if (tmp_entry->vme_start >=
10679                            curr_address + curr_max_above) {
10680                         /*
10681                          * tmp_entry is beyond the scope of what we mapped of
10682                          * this submap in the upper level: ignore it.
10683                          */
10684                 } else if ((next_entry == NULL) ||
10685                            (tmp_entry->vme_start + curr_offset <=
10686                             next_entry->vme_start + next_offset)) {
10687                         /*
10688                          * We didn't have a "next_entry" or this one is
10689                          * closer to the address we're looking for:
10690                          * use this "tmp_entry" as the new "next_entry".
10691                          */
10692                         if (next_entry != NULL) {
10693                                 /* unlock the last "next_map" */
10694                                 if (next_map != curr_map && not_in_kdp) {
10695                                         vm_map_unlock_read(next_map);
10696                                 }
10697                         }
10698                         next_entry = tmp_entry;
10699                         next_map = curr_map;
10700                         next_depth = curr_depth;
10701                         next_address = next_entry->vme_start;
10702                         next_skip = curr_skip;
10703                         next_offset = curr_offset;
10704                         next_offset += (next_address - curr_address);
10705                         next_max_above = MIN(next_max_above, curr_max_above);
10706                         next_max_above = MIN(next_max_above,
10707                                              next_entry->vme_end - next_address);
10708                         next_max_below = MIN(next_max_below, curr_max_below);
10709                         next_max_below = MIN(next_max_below,
10710                                              next_address - next_entry->vme_start);
10711                 }
10712
10713                 /*
10714                  * "curr_max_{above,below}" allow us to keep track of the
10715                  * portion of the submap that is actually mapped at this level:
10716                  * the rest of that submap is irrelevant to us, since it's not
10717                  * mapped here.
10718                  * The relevant portion of the map starts at
10719                  * "curr_entry->offset" up to the size of "curr_entry".
10720                  */
10721                 curr_max_above = MIN(curr_max_above,
10722                                      curr_entry->vme_end - curr_address);
10723                 curr_max_below = MIN(curr_max_below,
10724                                      curr_address - curr_entry->vme_start);
10725
10726                 if (!curr_entry->is_sub_map ||
10727                     curr_depth >= user_max_depth) {
10728                         /*
10729                          * We hit a leaf map or we reached the maximum depth
10730                          * we could, so stop looking.  Keep the current map
10731                          * locked.
10732                          */
10733                         break;
10734                 }
10735
10736                 /*
10737                  * Get down to the next submap level.
10738                  */
10739
10740                 /*
10741                  * Lock the next level and unlock the current level,
10742                  * unless we need to keep it locked to access the "next_entry"
10743                  * later.
10744                  */
10745                 if (not_in_kdp) {
10746                         vm_map_lock_read(curr_entry->object.sub_map);
10747                 }
10748                 if (curr_map == next_map) {
10749                         /* keep "next_map" locked in case we need it */
10750                 } else {
10751                         /* release this map */
10752                         if (not_in_kdp)
10753                                 vm_map_unlock_read(curr_map);
10754                 }
10755
10756                 /*
10757                  * Adjust the offset.  "curr_entry" maps the submap
10758                  * at relative address "curr_entry->vme_start" in the
10759                  * curr_map but skips the first "curr_entry->offset"
10760                  * bytes of the submap.
10761                  * "curr_offset" always represents the offset of a virtual
10762                  * address in the curr_map relative to the absolute address
10763                  * space (i.e. the top-level VM map).
10764                  */
10765                 curr_offset +=
10766                         (curr_entry->offset - curr_entry->vme_start);
10767                 curr_address = user_address + curr_offset;
10768                 /* switch to the submap */
10769                 curr_map = curr_entry->object.sub_map;
10770                 curr_depth++;
10771                 curr_entry = NULL;
10772         }
10773
10774         if (curr_entry == NULL) {
10775                 /* no VM region contains the address... */
10776                 if (next_entry == NULL) {
10777                         /* ... and no VM region follows it either */
10778                         return KERN_INVALID_ADDRESS;
10779                 }
10780                 /* ... gather info about the next VM region */
10781                 curr_entry = next_entry;
10782                 curr_map = next_map;    /* still locked ... */
10783                 curr_address = next_address;
10784                 curr_skip = next_skip;
10785                 curr_offset = next_offset;
10786                 curr_depth = next_depth;
10787                 curr_max_above = next_max_above;
10788                 curr_max_below = next_max_below;
10789                 if (curr_map == map) {
10790                         user_address = curr_address;
10791                 }
10792         } else {
10793                 /* we won't need "next_entry" after all */
10794                 if (next_entry != NULL) {
10795                         /* release "next_map" */
10796                         if (next_map != curr_map && not_in_kdp) {
10797                                 vm_map_unlock_read(next_map);
10798                         }
10799                 }
10800         }
10801         next_entry = NULL;
10802         next_map = NULL;
10803         next_offset = 0;
10804         next_skip = 0;
10805         next_depth = 0;
10806         next_max_below = -1;
10807         next_max_above = -1;
10808
10809         *nesting_depth = curr_depth;
10810         *size = curr_max_above + curr_max_below;
10811         *address = user_address + curr_skip - curr_max_below;
10812
10813 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
10814 // so probably should be a real 32b ID vs. ptr.
10815 // Current users just check for equality
10816 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
10817
10818         if (look_for_pages) {
10819                 submap_info->user_tag = curr_entry->alias;
10820                 submap_info->offset = curr_entry->offset;
10821                 submap_info->protection = curr_entry->protection;
10822                 submap_info->inheritance = curr_entry->inheritance;
10823                 submap_info->max_protection = curr_entry->max_protection;
10824                 submap_info->behavior = curr_entry->behavior;
10825                 submap_info->user_wired_count = curr_entry->user_wired_count;
10826                 submap_info->is_submap = curr_entry->is_sub_map;
10827                 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10828         } else {
10829                 short_info->user_tag = curr_entry->alias;
10830                 short_info->offset = curr_entry->offset;
10831                 short_info->protection = curr_entry->protection;
10832                 short_info->inheritance = curr_entry->inheritance;
10833                 short_info->max_protection = curr_entry->max_protection;
10834                 short_info->behavior = curr_entry->behavior;
10835                 short_info->user_wired_count = curr_entry->user_wired_count;
10836                 short_info->is_submap = curr_entry->is_sub_map;
10837                 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object);
10838         }
10839
10840         extended.pages_resident = 0;
10841         extended.pages_swapped_out = 0;
10842         extended.pages_shared_now_private = 0;
10843         extended.pages_dirtied = 0;
10844         extended.pages_reusable = 0;
10845         extended.external_pager = 0;
10846         extended.shadow_depth = 0;
10847
10848         if (not_in_kdp) {
10849                 if (!curr_entry->is_sub_map) {
10850                         vm_map_offset_t range_start, range_end;
10851                         range_start = MAX((curr_address - curr_max_below),
10852                                           curr_entry->vme_start);
10853                         range_end = MIN((curr_address + curr_max_above),
10854                                         curr_entry->vme_end);
10855                         vm_map_region_walk(curr_map,
10856                                            range_start,
10857                                            curr_entry,
10858                                            (curr_entry->offset +
10859                                             (range_start -
10860                                              curr_entry->vme_start)),
10861                                            range_end - range_start,
10862                                            &extended,
10863                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
10864                         if (extended.external_pager &&
10865                             extended.ref_count == 2 &&
10866                             extended.share_mode == SM_SHARED) {
10867                                 extended.share_mode = SM_PRIVATE;
10868                         }
10869                 } else {
10870                         if (curr_entry->use_pmap) {
10871                                 extended.share_mode = SM_TRUESHARED;
10872                         } else {
10873                                 extended.share_mode = SM_PRIVATE;
10874                         }
10875                         extended.ref_count =
10876                                 curr_entry->object.sub_map->ref_count;
10877                 }
10878         }
10879
10880         if (look_for_pages) {
10881                 submap_info->pages_resident = extended.pages_resident;
10882                 submap_info->pages_swapped_out = extended.pages_swapped_out;
10883                 submap_info->pages_shared_now_private =
10884                         extended.pages_shared_now_private;
10885                 submap_info->pages_dirtied = extended.pages_dirtied;
10886                 submap_info->external_pager = extended.external_pager;
10887                 submap_info->shadow_depth = extended.shadow_depth;
10888                 submap_info->share_mode = extended.share_mode;
10889                 submap_info->ref_count = extended.ref_count;
10890
10891                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
10892                         submap_info->pages_reusable = extended.pages_reusable;
10893                 }
10894         } else {
10895                 short_info->external_pager = extended.external_pager;
10896                 short_info->shadow_depth = extended.shadow_depth;
10897                 short_info->share_mode = extended.share_mode;
10898                 short_info->ref_count = extended.ref_count;
10899         }
10900
10901         if (not_in_kdp) {
10902                 vm_map_unlock_read(curr_map);
10903         }
10904
10905         return KERN_SUCCESS;
10906 }
10907
10908 /*
10909  *      vm_region:
10910  *
10911  *      User call to obtain information about a region in
10912  *      a task's address map. Currently, only one flavor is
10913  *      supported.
10914  *
10915  *      XXX The reserved and behavior fields cannot be filled
10916  *          in until the vm merge from the IK is completed, and
10917  *          vm_reserve is implemented.
10918  */
10919
10920 kern_return_t
10921 vm_map_region(
10922         vm_map_t                 map,
10923         vm_map_offset_t *address,               /* IN/OUT */
10924         vm_map_size_t           *size,                  /* OUT */
10925         vm_region_flavor_t       flavor,                /* IN */
10926         vm_region_info_t         info,                  /* OUT */
10927         mach_msg_type_number_t  *count, /* IN/OUT */
10928         mach_port_t             *object_name)           /* OUT */
10929 {
10930         vm_map_entry_t          tmp_entry;
10931         vm_map_entry_t          entry;
10932         vm_map_offset_t         start;
10933
10934         if (map == VM_MAP_NULL)
10935                 return(KERN_INVALID_ARGUMENT);
10936
10937         switch (flavor) {
10938
10939         case VM_REGION_BASIC_INFO:
10940                 /* legacy for old 32-bit objects info */
10941         {
10942                 vm_region_basic_info_t  basic;
10943
10944                 if (*count < VM_REGION_BASIC_INFO_COUNT)
10945                         return(KERN_INVALID_ARGUMENT);
10946
10947                 basic = (vm_region_basic_info_t) info;
10948                 *count = VM_REGION_BASIC_INFO_COUNT;
10949
10950                 vm_map_lock_read(map);
10951
10952                 start = *address;
10953                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10954                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
10955                                 vm_map_unlock_read(map);
10956                                 return(KERN_INVALID_ADDRESS);
10957                         }
10958                 } else {
10959                         entry = tmp_entry;
10960                 }
10961
10962                 start = entry->vme_start;
10963
10964                 basic->offset = (uint32_t)entry->offset;
10965                 basic->protection = entry->protection;
10966                 basic->inheritance = entry->inheritance;
10967                 basic->max_protection = entry->max_protection;
10968                 basic->behavior = entry->behavior;
10969                 basic->user_wired_count = entry->user_wired_count;
10970                 basic->reserved = entry->is_sub_map;
10971                 *address = start;
10972                 *size = (entry->vme_end - start);
10973
10974                 if (object_name) *object_name = IP_NULL;
10975                 if (entry->is_sub_map) {
10976                         basic->shared = FALSE;
10977                 } else {
10978                         basic->shared = entry->is_shared;
10979                 }
10980
10981                 vm_map_unlock_read(map);
10982                 return(KERN_SUCCESS);
10983         }
10984
10985         case VM_REGION_BASIC_INFO_64:
10986         {
10987                 vm_region_basic_info_64_t       basic;
10988
10989                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
10990                         return(KERN_INVALID_ARGUMENT);
10991
10992                 basic = (vm_region_basic_info_64_t) info;
10993                 *count = VM_REGION_BASIC_INFO_COUNT_64;
10994
10995                 vm_map_lock_read(map);
10996
10997                 start = *address;
10998                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
10999                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11000                                 vm_map_unlock_read(map);
11001                                 return(KERN_INVALID_ADDRESS);
11002                         }
11003                 } else {
11004                         entry = tmp_entry;
11005                 }
11006
11007                 start = entry->vme_start;
11008
11009                 basic->offset = entry->offset;
11010                 basic->protection = entry->protection;
11011                 basic->inheritance = entry->inheritance;
11012                 basic->max_protection = entry->max_protection;
11013                 basic->behavior = entry->behavior;
11014                 basic->user_wired_count = entry->user_wired_count;
11015                 basic->reserved = entry->is_sub_map;
11016                 *address = start;
11017                 *size = (entry->vme_end - start);
11018
11019                 if (object_name) *object_name = IP_NULL;
11020                 if (entry->is_sub_map) {
11021                         basic->shared = FALSE;
11022                 } else {
11023                         basic->shared = entry->is_shared;
11024                 }
11025
11026                 vm_map_unlock_read(map);
11027                 return(KERN_SUCCESS);
11028         }
11029         case VM_REGION_EXTENDED_INFO:
11030                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
11031                         return(KERN_INVALID_ARGUMENT);
11032                 /*fallthru*/
11033         case VM_REGION_EXTENDED_INFO__legacy:
11034                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
11035                         return KERN_INVALID_ARGUMENT;
11036
11037         {
11038                 vm_region_extended_info_t       extended;
11039                 mach_msg_type_number_t original_count;
11040
11041                 extended = (vm_region_extended_info_t) info;
11042
11043                 vm_map_lock_read(map);
11044
11045                 start = *address;
11046                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11047                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11048                                 vm_map_unlock_read(map);
11049                                 return(KERN_INVALID_ADDRESS);
11050                         }
11051                 } else {
11052                         entry = tmp_entry;
11053                 }
11054                 start = entry->vme_start;
11055
11056                 extended->protection = entry->protection;
11057                 extended->user_tag = entry->alias;
11058                 extended->pages_resident = 0;
11059                 extended->pages_swapped_out = 0;
11060                 extended->pages_shared_now_private = 0;
11061                 extended->pages_dirtied = 0;
11062                 extended->external_pager = 0;
11063                 extended->shadow_depth = 0;
11064
11065                 original_count = *count;
11066                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
11067                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
11068                 } else {
11069                         extended->pages_reusable = 0;
11070                         *count = VM_REGION_EXTENDED_INFO_COUNT;
11071                 }
11072
11073                 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count);
11074
11075                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
11076                         extended->share_mode = SM_PRIVATE;
11077
11078                 if (object_name)
11079                         *object_name = IP_NULL;
11080                 *address = start;
11081                 *size = (entry->vme_end - start);
11082
11083                 vm_map_unlock_read(map);
11084                 return(KERN_SUCCESS);
11085         }
11086         case VM_REGION_TOP_INFO:
11087         {
11088                 vm_region_top_info_t    top;
11089
11090                 if (*count < VM_REGION_TOP_INFO_COUNT)
11091                         return(KERN_INVALID_ARGUMENT);
11092
11093                 top = (vm_region_top_info_t) info;
11094                 *count = VM_REGION_TOP_INFO_COUNT;
11095
11096                 vm_map_lock_read(map);
11097
11098                 start = *address;
11099                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
11100                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
11101                                 vm_map_unlock_read(map);
11102                                 return(KERN_INVALID_ADDRESS);
11103                         }
11104                 } else {
11105                         entry = tmp_entry;
11106
11107                 }
11108                 start = entry->vme_start;
11109
11110                 top->private_pages_resident = 0;
11111                 top->shared_pages_resident = 0;
11112
11113                 vm_map_region_top_walk(entry, top);
11114
11115                 if (object_name)
11116                         *object_name = IP_NULL;
11117                 *address = start;
11118                 *size = (entry->vme_end - start);
11119
11120                 vm_map_unlock_read(map);
11121                 return(KERN_SUCCESS);
11122         }
11123         default:
11124                 return(KERN_INVALID_ARGUMENT);
11125         }
11126 }
11127
11128 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
11129         MIN((entry_size),                                               \
11130             ((obj)->all_reusable ?                                      \
11131              (obj)->wired_page_count :                                  \
11132              (obj)->resident_page_count - (obj)->reusable_page_count))
11133
11134 void
11135 vm_map_region_top_walk(
11136         vm_map_entry_t             entry,
11137         vm_region_top_info_t       top)
11138 {
11139
11140         if (entry->object.vm_object == 0 || entry->is_sub_map) {
11141                 top->share_mode = SM_EMPTY;
11142                 top->ref_count = 0;
11143                 top->obj_id = 0;
11144                 return;
11145         }
11146
11147         {
11148                 struct  vm_object *obj, *tmp_obj;
11149                 int             ref_count;
11150                 uint32_t        entry_size;
11151
11152                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
11153
11154                 obj = entry->object.vm_object;
11155
11156                 vm_object_lock(obj);
11157
11158                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11159                         ref_count--;
11160
11161                 assert(obj->reusable_page_count <= obj->resident_page_count);
11162                 if (obj->shadow) {
11163                         if (ref_count == 1)
11164                                 top->private_pages_resident =
11165                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11166                         else
11167                                 top->shared_pages_resident =
11168                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11169                         top->ref_count  = ref_count;
11170                         top->share_mode = SM_COW;
11171
11172                         while ((tmp_obj = obj->shadow)) {
11173                                 vm_object_lock(tmp_obj);
11174                                 vm_object_unlock(obj);
11175                                 obj = tmp_obj;
11176
11177                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11178                                         ref_count--;
11179
11180                                 assert(obj->reusable_page_count <= obj->resident_page_count);
11181                                 top->shared_pages_resident +=
11182                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11183                                 top->ref_count += ref_count - 1;
11184                         }
11185                 } else {
11186                         if (entry->superpage_size) {
11187                                 top->share_mode = SM_LARGE_PAGE;
11188                                 top->shared_pages_resident = 0;
11189                                 top->private_pages_resident = entry_size;
11190                         } else if (entry->needs_copy) {
11191                                 top->share_mode = SM_COW;
11192                                 top->shared_pages_resident =
11193                                         OBJ_RESIDENT_COUNT(obj, entry_size);
11194                         } else {
11195                                 if (ref_count == 1 ||
11196                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
11197                                         top->share_mode = SM_PRIVATE;
11198                                                 top->private_pages_resident =
11199                                                         OBJ_RESIDENT_COUNT(obj,
11200                                                                            entry_size);
11201                                 } else {
11202                                         top->share_mode = SM_SHARED;
11203                                         top->shared_pages_resident =
11204                                                 OBJ_RESIDENT_COUNT(obj,
11205                                                                   entry_size);
11206                                 }
11207                         }
11208                         top->ref_count = ref_count;
11209                 }
11210                 /* XXX K64: obj_id will be truncated */
11211                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
11212
11213                 vm_object_unlock(obj);
11214         }
11215 }
11216
11217 void
11218 vm_map_region_walk(
11219         vm_map_t                        map,
11220         vm_map_offset_t                 va,
11221         vm_map_entry_t                  entry,
11222         vm_object_offset_t              offset,
11223         vm_object_size_t                range,
11224         vm_region_extended_info_t       extended,
11225         boolean_t                       look_for_pages,
11226         mach_msg_type_number_t count)
11227 {
11228         register struct vm_object *obj, *tmp_obj;
11229         register vm_map_offset_t       last_offset;
11230         register int               i;
11231         register int               ref_count;
11232         struct vm_object        *shadow_object;
11233         int                     shadow_depth;
11234
11235         if ((entry->object.vm_object == 0) ||
11236             (entry->is_sub_map) ||
11237             (entry->object.vm_object->phys_contiguous &&
11238              !entry->superpage_size)) {
11239                 extended->share_mode = SM_EMPTY;
11240                 extended->ref_count = 0;
11241                 return;
11242         }
11243
11244         if (entry->superpage_size) {
11245                 extended->shadow_depth = 0;
11246                 extended->share_mode = SM_LARGE_PAGE;
11247                 extended->ref_count = 1;
11248                 extended->external_pager = 0;
11249                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
11250                 extended->shadow_depth = 0;
11251                 return;
11252         }
11253
11254         {
11255                 obj = entry->object.vm_object;
11256
11257                 vm_object_lock(obj);
11258
11259                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11260                         ref_count--;
11261
11262                 if (look_for_pages) {
11263                         for (last_offset = offset + range;
11264                              offset < last_offset;
11265                              offset += PAGE_SIZE_64, va += PAGE_SIZE) {
11266                                         vm_map_region_look_for_page(map, va, obj,
11267                                                                     offset, ref_count,
11268                                                                     0, extended, count);
11269                         }
11270                 } else {
11271                         shadow_object = obj->shadow;
11272                         shadow_depth = 0;
11273
11274                         if ( !(obj->pager_trusted) && !(obj->internal))
11275                                 extended->external_pager = 1;
11276
11277                         if (shadow_object != VM_OBJECT_NULL) {
11278                                 vm_object_lock(shadow_object);
11279                                 for (;
11280                                      shadow_object != VM_OBJECT_NULL;
11281                                      shadow_depth++) {
11282                                         vm_object_t     next_shadow;
11283
11284                                         if ( !(shadow_object->pager_trusted) &&
11285                                              !(shadow_object->internal))
11286                                                 extended->external_pager = 1;
11287
11288                                         next_shadow = shadow_object->shadow;
11289                                         if (next_shadow) {
11290                                                 vm_object_lock(next_shadow);
11291                                         }
11292                                         vm_object_unlock(shadow_object);
11293                                         shadow_object = next_shadow;
11294                                 }
11295                         }
11296                         extended->shadow_depth = shadow_depth;
11297                 }
11298
11299                 if (extended->shadow_depth || entry->needs_copy)
11300                         extended->share_mode = SM_COW;
11301                 else {
11302                         if (ref_count == 1)
11303                                 extended->share_mode = SM_PRIVATE;
11304                         else {
11305                                 if (obj->true_share)
11306                                         extended->share_mode = SM_TRUESHARED;
11307                                 else
11308                                         extended->share_mode = SM_SHARED;
11309                         }
11310                 }
11311                 extended->ref_count = ref_count - extended->shadow_depth;
11312
11313                 for (i = 0; i < extended->shadow_depth; i++) {
11314                         if ((tmp_obj = obj->shadow) == 0)
11315                                 break;
11316                         vm_object_lock(tmp_obj);
11317                         vm_object_unlock(obj);
11318
11319                         if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
11320                                 ref_count--;
11321
11322                         extended->ref_count += ref_count;
11323                         obj = tmp_obj;
11324                 }
11325                 vm_object_unlock(obj);
11326
11327                 if (extended->share_mode == SM_SHARED) {
11328                         register vm_map_entry_t      cur;
11329                         register vm_map_entry_t      last;
11330                         int      my_refs;
11331
11332                         obj = entry->object.vm_object;
11333                         last = vm_map_to_entry(map);
11334                         my_refs = 0;
11335
11336                         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
11337                                 ref_count--;
11338                         for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
11339                                 my_refs += vm_map_region_count_obj_refs(cur, obj);
11340
11341                         if (my_refs == ref_count)
11342                                 extended->share_mode = SM_PRIVATE_ALIASED;
11343                         else if (my_refs > 1)
11344                                 extended->share_mode = SM_SHARED_ALIASED;
11345                 }
11346         }
11347 }
11348
11349
11350 /* object is locked on entry and locked on return */
11351
11352
11353 static void
11354 vm_map_region_look_for_page(
11355         __unused vm_map_t               map,
11356         __unused vm_map_offset_t        va,
11357         vm_object_t                     object,
11358         vm_object_offset_t              offset,
11359         int                             max_refcnt,
11360         int                             depth,
11361         vm_region_extended_info_t       extended,
11362         mach_msg_type_number_t count)
11363 {
11364         register vm_page_t      p;
11365         register vm_object_t    shadow;
11366         register int            ref_count;
11367         vm_object_t             caller_object;
11368         kern_return_t           kr;
11369         shadow = object->shadow;
11370         caller_object = object;
11371
11372
11373         while (TRUE) {
11374
11375                 if ( !(object->pager_trusted) && !(object->internal))
11376                         extended->external_pager = 1;
11377
11378                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
11379                         if (shadow && (max_refcnt == 1))
11380                                 extended->pages_shared_now_private++;
11381
11382                         if (!p->fictitious &&
11383                             (p->dirty || pmap_is_modified(p->phys_page)))
11384                                 extended->pages_dirtied++;
11385                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
11386                                 if (p->reusable || p->object->all_reusable) {
11387                                         extended->pages_reusable++;
11388                                 }
11389                         }
11390
11391                         extended->pages_resident++;
11392
11393                         if(object != caller_object)
11394                                 vm_object_unlock(object);
11395
11396                         return;
11397                 }
11398 #if     MACH_PAGEMAP
11399                 if (object->existence_map) {
11400                         if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) {
11401
11402                                 extended->pages_swapped_out++;
11403
11404                                 if(object != caller_object)
11405                                         vm_object_unlock(object);
11406
11407                                 return;
11408                         }
11409                 } else
11410 #endif /* MACH_PAGEMAP */
11411                 if (object->internal &&
11412                     object->alive &&
11413                     !object->terminating &&
11414                     object->pager_ready) {
11415
11416                         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
11417                                 if (VM_COMPRESSOR_PAGER_STATE_GET(object,
11418                                                                   offset)
11419                                     == VM_EXTERNAL_STATE_EXISTS) {
11420                                         /* the pager has that page */
11421                                         extended->pages_swapped_out++;
11422                                         if (object != caller_object)
11423                                                 vm_object_unlock(object);
11424                                         return;
11425                                 }
11426                         } else {
11427                                 memory_object_t pager;
11428
11429                                 vm_object_paging_begin(object);
11430                                 pager = object->pager;
11431                                 vm_object_unlock(object);
11432
11433                                 kr = memory_object_data_request(
11434                                         pager,
11435                                         offset + object->paging_offset,
11436                                         0, /* just poke the pager */
11437                                         VM_PROT_READ,
11438                                         NULL);
11439
11440                                 vm_object_lock(object);
11441                                 vm_object_paging_end(object);
11442
11443                                 if (kr == KERN_SUCCESS) {
11444                                         /* the pager has that page */
11445                                         extended->pages_swapped_out++;
11446                                         if (object != caller_object)
11447                                                 vm_object_unlock(object);
11448                                         return;
11449                                 }
11450                         }
11451                 }
11452
11453                 if (shadow) {
11454                         vm_object_lock(shadow);
11455
11456                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
11457                                 ref_count--;
11458
11459                         if (++depth > extended->shadow_depth)
11460                                 extended->shadow_depth = depth;
11461
11462                         if (ref_count > max_refcnt)
11463                                 max_refcnt = ref_count;
11464
11465                         if(object != caller_object)
11466                                 vm_object_unlock(object);
11467
11468                         offset = offset + object->vo_shadow_offset;
11469                         object = shadow;
11470                         shadow = object->shadow;
11471                         continue;
11472                 }
11473                 if(object != caller_object)
11474                         vm_object_unlock(object);
11475                 break;
11476         }
11477 }
11478
11479 static int
11480 vm_map_region_count_obj_refs(
11481         vm_map_entry_t    entry,
11482         vm_object_t       object)
11483 {
11484         register int ref_count;
11485         register vm_object_t chk_obj;
11486         register vm_object_t tmp_obj;
11487
11488         if (entry->object.vm_object == 0)
11489                 return(0);
11490
11491         if (entry->is_sub_map)
11492                 return(0);
11493         else {
11494                 ref_count = 0;
11495
11496                 chk_obj = entry->object.vm_object;
11497                 vm_object_lock(chk_obj);
11498
11499                 while (chk_obj) {
11500                         if (chk_obj == object)
11501                                 ref_count++;
11502                         tmp_obj = chk_obj->shadow;
11503                         if (tmp_obj)
11504                                 vm_object_lock(tmp_obj);
11505                         vm_object_unlock(chk_obj);
11506
11507                         chk_obj = tmp_obj;
11508                 }
11509         }
11510         return(ref_count);
11511 }
11512
11513
11514 /*
11515  *      Routine:        vm_map_simplify
11516  *
11517  *      Description:
11518  *              Attempt to simplify the map representation in
11519  *              the vicinity of the given starting address.
11520  *      Note:
11521  *              This routine is intended primarily to keep the
11522  *              kernel maps more compact -- they generally don't
11523  *              benefit from the "expand a map entry" technology
11524  *              at allocation time because the adjacent entry
11525  *              is often wired down.
11526  */
11527 void
11528 vm_map_simplify_entry(
11529         vm_map_t        map,
11530         vm_map_entry_t  this_entry)
11531 {
11532         vm_map_entry_t  prev_entry;
11533
11534         counter(c_vm_map_simplify_entry_called++);
11535
11536         prev_entry = this_entry->vme_prev;
11537
11538         if ((this_entry != vm_map_to_entry(map)) &&
11539             (prev_entry != vm_map_to_entry(map)) &&
11540
11541             (prev_entry->vme_end == this_entry->vme_start) &&
11542
11543             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
11544             (prev_entry->object.vm_object == this_entry->object.vm_object) &&
11545             ((prev_entry->offset + (prev_entry->vme_end -
11546                                     prev_entry->vme_start))
11547              == this_entry->offset) &&
11548
11549             (prev_entry->behavior == this_entry->behavior) &&
11550             (prev_entry->needs_copy == this_entry->needs_copy) &&
11551             (prev_entry->protection == this_entry->protection) &&
11552             (prev_entry->max_protection == this_entry->max_protection) &&
11553             (prev_entry->inheritance == this_entry->inheritance) &&
11554             (prev_entry->use_pmap == this_entry->use_pmap) &&
11555             (prev_entry->alias == this_entry->alias) &&
11556             (prev_entry->no_cache == this_entry->no_cache) &&
11557             (prev_entry->permanent == this_entry->permanent) &&
11558             (prev_entry->map_aligned == this_entry->map_aligned) &&
11559             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
11560             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
11561             /* from_reserved_zone: OK if that field doesn't match */
11562             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
11563
11564             (prev_entry->wired_count == this_entry->wired_count) &&
11565             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
11566
11567             (prev_entry->in_transition == FALSE) &&
11568             (this_entry->in_transition == FALSE) &&
11569             (prev_entry->needs_wakeup == FALSE) &&
11570             (this_entry->needs_wakeup == FALSE) &&
11571             (prev_entry->is_shared == FALSE) &&
11572             (this_entry->is_shared == FALSE) &&
11573             (prev_entry->superpage_size == FALSE) &&
11574             (this_entry->superpage_size == FALSE)
11575                 ) {
11576                 vm_map_store_entry_unlink(map, prev_entry);
11577                 assert(prev_entry->vme_start < this_entry->vme_end);
11578                 if (prev_entry->map_aligned)
11579                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
11580                                                    VM_MAP_PAGE_MASK(map)));
11581                 this_entry->vme_start = prev_entry->vme_start;
11582                 this_entry->offset = prev_entry->offset;
11583                 if (prev_entry->is_sub_map) {
11584                         vm_map_deallocate(prev_entry->object.sub_map);
11585                 } else {
11586                         vm_object_deallocate(prev_entry->object.vm_object);
11587                 }
11588                 vm_map_entry_dispose(map, prev_entry);
11589                 SAVE_HINT_MAP_WRITE(map, this_entry);
11590                 counter(c_vm_map_simplified++);
11591         }
11592 }
11593
11594 void
11595 vm_map_simplify(
11596         vm_map_t        map,
11597         vm_map_offset_t start)
11598 {
11599         vm_map_entry_t  this_entry;
11600
11601         vm_map_lock(map);
11602         if (vm_map_lookup_entry(map, start, &this_entry)) {
11603                 vm_map_simplify_entry(map, this_entry);
11604                 vm_map_simplify_entry(map, this_entry->vme_next);
11605         }
11606         counter(c_vm_map_simplify_called++);
11607         vm_map_unlock(map);
11608 }
11609
11610 static void
11611 vm_map_simplify_range(
11612         vm_map_t        map,
11613         vm_map_offset_t start,
11614         vm_map_offset_t end)
11615 {
11616         vm_map_entry_t  entry;
11617
11618         /*
11619          * The map should be locked (for "write") by the caller.
11620          */
11621
11622         if (start >= end) {
11623                 /* invalid address range */
11624                 return;
11625         }
11626
11627         start = vm_map_trunc_page(start,
11628                                   VM_MAP_PAGE_MASK(map));
11629         end = vm_map_round_page(end,
11630                                 VM_MAP_PAGE_MASK(map));
11631
11632         if (!vm_map_lookup_entry(map, start, &entry)) {
11633                 /* "start" is not mapped and "entry" ends before "start" */
11634                 if (entry == vm_map_to_entry(map)) {
11635                         /* start with first entry in the map */
11636                         entry = vm_map_first_entry(map);
11637                 } else {
11638                         /* start with next entry */
11639                         entry = entry->vme_next;
11640                 }
11641         }
11642
11643         while (entry != vm_map_to_entry(map) &&
11644                entry->vme_start <= end) {
11645                 /* try and coalesce "entry" with its previous entry */
11646                 vm_map_simplify_entry(map, entry);
11647                 entry = entry->vme_next;
11648         }
11649 }
11650
11651
11652 /*
11653  *      Routine:        vm_map_machine_attribute
11654  *      Purpose:
11655  *              Provide machine-specific attributes to mappings,
11656  *              such as cachability etc. for machines that provide
11657  *              them.  NUMA architectures and machines with big/strange
11658  *              caches will use this.
11659  *      Note:
11660  *              Responsibilities for locking and checking are handled here,
11661  *              everything else in the pmap module. If any non-volatile
11662  *              information must be kept, the pmap module should handle
11663  *              it itself. [This assumes that attributes do not
11664  *              need to be inherited, which seems ok to me]
11665  */
11666 kern_return_t
11667 vm_map_machine_attribute(
11668         vm_map_t                        map,
11669         vm_map_offset_t         start,
11670         vm_map_offset_t         end,
11671         vm_machine_attribute_t  attribute,
11672         vm_machine_attribute_val_t* value)              /* IN/OUT */
11673 {
11674         kern_return_t   ret;
11675         vm_map_size_t sync_size;
11676         vm_map_entry_t entry;
11677
11678         if (start < vm_map_min(map) || end > vm_map_max(map))
11679                 return KERN_INVALID_ADDRESS;
11680
11681         /* Figure how much memory we need to flush (in page increments) */
11682         sync_size = end - start;
11683
11684         vm_map_lock(map);
11685
11686         if (attribute != MATTR_CACHE) {
11687                 /* If we don't have to find physical addresses, we */
11688                 /* don't have to do an explicit traversal here.    */
11689                 ret = pmap_attribute(map->pmap, start, end-start,
11690                                      attribute, value);
11691                 vm_map_unlock(map);
11692                 return ret;
11693         }
11694
11695         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
11696
11697         while(sync_size) {
11698                 if (vm_map_lookup_entry(map, start, &entry)) {
11699                         vm_map_size_t   sub_size;
11700                         if((entry->vme_end - start) > sync_size) {
11701                                 sub_size = sync_size;
11702                                 sync_size = 0;
11703                         } else {
11704                                 sub_size = entry->vme_end - start;
11705                                 sync_size -= sub_size;
11706                         }
11707                         if(entry->is_sub_map) {
11708                                 vm_map_offset_t sub_start;
11709                                 vm_map_offset_t sub_end;
11710
11711                                 sub_start = (start - entry->vme_start)
11712                                         + entry->offset;
11713                                 sub_end = sub_start + sub_size;
11714                                 vm_map_machine_attribute(
11715                                         entry->object.sub_map,
11716                                         sub_start,
11717                                         sub_end,
11718                                         attribute, value);
11719                         } else {
11720                                 if(entry->object.vm_object) {
11721                                         vm_page_t               m;
11722                                         vm_object_t             object;
11723                                         vm_object_t             base_object;
11724                                         vm_object_t             last_object;
11725                                         vm_object_offset_t      offset;
11726                                         vm_object_offset_t      base_offset;
11727                                         vm_map_size_t           range;
11728                                         range = sub_size;
11729                                         offset = (start - entry->vme_start)
11730                                                 + entry->offset;
11731                                         base_offset = offset;
11732                                         object = entry->object.vm_object;
11733                                         base_object = object;
11734                                         last_object = NULL;
11735
11736                                         vm_object_lock(object);
11737
11738                                         while (range) {
11739                                                 m = vm_page_lookup(
11740                                                         object, offset);
11741
11742                                                 if (m && !m->fictitious) {
11743                                                         ret =
11744                                                                 pmap_attribute_cache_sync(
11745                                                                         m->phys_page,
11746                                                                         PAGE_SIZE,
11747                                                                         attribute, value);
11748
11749                                                 } else if (object->shadow) {
11750                                                         offset = offset + object->vo_shadow_offset;
11751                                                         last_object = object;
11752                                                         object = object->shadow;
11753                                                         vm_object_lock(last_object->shadow);
11754                                                         vm_object_unlock(last_object);
11755                                                         continue;
11756                                                 }
11757                                                 range -= PAGE_SIZE;
11758
11759                                                 if (base_object != object) {
11760                                                         vm_object_unlock(object);
11761                                                         vm_object_lock(base_object);
11762                                                         object = base_object;
11763                                                 }
11764                                                 /* Bump to the next page */
11765                                                 base_offset += PAGE_SIZE;
11766                                                 offset = base_offset;
11767                                         }
11768                                         vm_object_unlock(object);
11769                                 }
11770                         }
11771                         start += sub_size;
11772                 } else {
11773                         vm_map_unlock(map);
11774                         return KERN_FAILURE;
11775                 }
11776
11777         }
11778
11779         vm_map_unlock(map);
11780
11781         return ret;
11782 }
11783
11784 /*
11785  *      vm_map_behavior_set:
11786  *
11787  *      Sets the paging reference behavior of the specified address
11788  *      range in the target map.  Paging reference behavior affects
11789  *      how pagein operations resulting from faults on the map will be
11790  *      clustered.
11791  */
11792 kern_return_t
11793 vm_map_behavior_set(
11794         vm_map_t        map,
11795         vm_map_offset_t start,
11796         vm_map_offset_t end,
11797         vm_behavior_t   new_behavior)
11798 {
11799         register vm_map_entry_t entry;
11800         vm_map_entry_t  temp_entry;
11801
11802         XPR(XPR_VM_MAP,
11803             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
11804             map, start, end, new_behavior, 0);
11805
11806         if (start > end ||
11807             start < vm_map_min(map) ||
11808             end > vm_map_max(map)) {
11809                 return KERN_NO_SPACE;
11810         }
11811
11812         switch (new_behavior) {
11813
11814         /*
11815          * This first block of behaviors all set a persistent state on the specified
11816          * memory range.  All we have to do here is to record the desired behavior
11817          * in the vm_map_entry_t's.
11818          */
11819
11820         case VM_BEHAVIOR_DEFAULT:
11821         case VM_BEHAVIOR_RANDOM:
11822         case VM_BEHAVIOR_SEQUENTIAL:
11823         case VM_BEHAVIOR_RSEQNTL:
11824         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
11825                 vm_map_lock(map);
11826
11827                 /*
11828                  *      The entire address range must be valid for the map.
11829                  *      Note that vm_map_range_check() does a
11830                  *      vm_map_lookup_entry() internally and returns the
11831                  *      entry containing the start of the address range if
11832                  *      the entire range is valid.
11833                  */
11834                 if (vm_map_range_check(map, start, end, &temp_entry)) {
11835                         entry = temp_entry;
11836                         vm_map_clip_start(map, entry, start);
11837                 }
11838                 else {
11839                         vm_map_unlock(map);
11840                         return(KERN_INVALID_ADDRESS);
11841                 }
11842
11843                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
11844                         vm_map_clip_end(map, entry, end);
11845                         if (entry->is_sub_map) {
11846                                 assert(!entry->use_pmap);
11847                         }
11848
11849                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
11850                                 entry->zero_wired_pages = TRUE;
11851                         } else {
11852                                 entry->behavior = new_behavior;
11853                         }
11854                         entry = entry->vme_next;
11855                 }
11856
11857                 vm_map_unlock(map);
11858                 break;
11859
11860         /*
11861          * The rest of these are different from the above in that they cause
11862          * an immediate action to take place as opposed to setting a behavior that
11863          * affects future actions.
11864          */
11865
11866         case VM_BEHAVIOR_WILLNEED:
11867                 return vm_map_willneed(map, start, end);
11868
11869         case VM_BEHAVIOR_DONTNEED:
11870                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
11871
11872         case VM_BEHAVIOR_FREE:
11873                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
11874
11875         case VM_BEHAVIOR_REUSABLE:
11876                 return vm_map_reusable_pages(map, start, end);
11877
11878         case VM_BEHAVIOR_REUSE:
11879                 return vm_map_reuse_pages(map, start, end);
11880
11881         case VM_BEHAVIOR_CAN_REUSE:
11882                 return vm_map_can_reuse(map, start, end);
11883
11884         default:
11885                 return(KERN_INVALID_ARGUMENT);
11886         }
11887
11888         return(KERN_SUCCESS);
11889 }
11890
11891
11892 /*
11893  * Internals for madvise(MADV_WILLNEED) system call.
11894  *
11895  * The present implementation is to do a read-ahead if the mapping corresponds
11896  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
11897  * and basically ignore the "advice" (which we are always free to do).
11898  */
11899
11900
11901 static kern_return_t
11902 vm_map_willneed(
11903         vm_map_t        map,
11904         vm_map_offset_t start,
11905         vm_map_offset_t end
11906 )
11907 {
11908         vm_map_entry_t                  entry;
11909         vm_object_t                     object;
11910         memory_object_t                 pager;
11911         struct vm_object_fault_info     fault_info;
11912         kern_return_t                   kr;
11913         vm_object_size_t                len;
11914         vm_object_offset_t              offset;
11915
11916         /*
11917          * Fill in static values in fault_info.  Several fields get ignored by the code
11918          * we call, but we'll fill them in anyway since uninitialized fields are bad
11919          * when it comes to future backwards compatibility.
11920          */
11921
11922         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
11923         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
11924         fault_info.no_cache      = FALSE;                       /* ignored value */
11925         fault_info.stealth       = TRUE;
11926         fault_info.io_sync = FALSE;
11927         fault_info.cs_bypass = FALSE;
11928         fault_info.mark_zf_absent = FALSE;
11929         fault_info.batch_pmap_op = FALSE;
11930
11931         /*
11932          * The MADV_WILLNEED operation doesn't require any changes to the
11933          * vm_map_entry_t's, so the read lock is sufficient.
11934          */
11935
11936         vm_map_lock_read(map);
11937
11938         /*
11939          * The madvise semantics require that the address range be fully
11940          * allocated with no holes.  Otherwise, we're required to return
11941          * an error.
11942          */
11943
11944         if (! vm_map_range_check(map, start, end, &entry)) {
11945                 vm_map_unlock_read(map);
11946                 return KERN_INVALID_ADDRESS;
11947         }
11948
11949         /*
11950          * Examine each vm_map_entry_t in the range.
11951          */
11952         for (; entry != vm_map_to_entry(map) && start < end; ) {
11953
11954                 /*
11955                  * The first time through, the start address could be anywhere
11956                  * within the vm_map_entry we found.  So adjust the offset to
11957                  * correspond.  After that, the offset will always be zero to
11958                  * correspond to the beginning of the current vm_map_entry.
11959                  */
11960                 offset = (start - entry->vme_start) + entry->offset;
11961
11962                 /*
11963                  * Set the length so we don't go beyond the end of the
11964                  * map_entry or beyond the end of the range we were given.
11965                  * This range could span also multiple map entries all of which
11966                  * map different files, so make sure we only do the right amount
11967                  * of I/O for each object.  Note that it's possible for there
11968                  * to be multiple map entries all referring to the same object
11969                  * but with different page permissions, but it's not worth
11970                  * trying to optimize that case.
11971                  */
11972                 len = MIN(entry->vme_end - start, end - start);
11973
11974                 if ((vm_size_t) len != len) {
11975                         /* 32-bit overflow */
11976                         len = (vm_size_t) (0 - PAGE_SIZE);
11977                 }
11978                 fault_info.cluster_size = (vm_size_t) len;
11979                 fault_info.lo_offset    = offset;
11980                 fault_info.hi_offset    = offset + len;
11981                 fault_info.user_tag     = entry->alias;
11982                 fault_info.pmap_options = 0;
11983                 if (entry->iokit_acct ||
11984                     (!entry->is_sub_map && !entry->use_pmap)) {
11985                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11986                 }
11987
11988                 /*
11989                  * If there's no read permission to this mapping, then just
11990                  * skip it.
11991                  */
11992                 if ((entry->protection & VM_PROT_READ) == 0) {
11993                         entry = entry->vme_next;
11994                         start = entry->vme_start;
11995                         continue;
11996                 }
11997
11998                 /*
11999                  * Find the file object backing this map entry.  If there is
12000                  * none, then we simply ignore the "will need" advice for this
12001                  * entry and go on to the next one.
12002                  */
12003                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
12004                         entry = entry->vme_next;
12005                         start = entry->vme_start;
12006                         continue;
12007                 }
12008
12009                 /*
12010                  * The data_request() could take a long time, so let's
12011                  * release the map lock to avoid blocking other threads.
12012                  */
12013                 vm_map_unlock_read(map);
12014
12015                 vm_object_paging_begin(object);
12016                 pager = object->pager;
12017                 vm_object_unlock(object);
12018
12019                 /*
12020                  * Get the data from the object asynchronously.
12021                  *
12022                  * Note that memory_object_data_request() places limits on the
12023                  * amount of I/O it will do.  Regardless of the len we
12024                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
12025                  * silently truncates the len to that size.  This isn't
12026                  * necessarily bad since madvise shouldn't really be used to
12027                  * page in unlimited amounts of data.  Other Unix variants
12028                  * limit the willneed case as well.  If this turns out to be an
12029                  * issue for developers, then we can always adjust the policy
12030                  * here and still be backwards compatible since this is all
12031                  * just "advice".
12032                  */
12033                 kr = memory_object_data_request(
12034                         pager,
12035                         offset + object->paging_offset,
12036                         0,      /* ignored */
12037                         VM_PROT_READ,
12038                         (memory_object_fault_info_t)&fault_info);
12039
12040                 vm_object_lock(object);
12041                 vm_object_paging_end(object);
12042                 vm_object_unlock(object);
12043
12044                 /*
12045                  * If we couldn't do the I/O for some reason, just give up on
12046                  * the madvise.  We still return success to the user since
12047                  * madvise isn't supposed to fail when the advice can't be
12048                  * taken.
12049                  */
12050                 if (kr != KERN_SUCCESS) {
12051                         return KERN_SUCCESS;
12052                 }
12053
12054                 start += len;
12055                 if (start >= end) {
12056                         /* done */
12057                         return KERN_SUCCESS;
12058                 }
12059
12060                 /* look up next entry */
12061                 vm_map_lock_read(map);
12062                 if (! vm_map_lookup_entry(map, start, &entry)) {
12063                         /*
12064                          * There's a new hole in the address range.
12065                          */
12066                         vm_map_unlock_read(map);
12067                         return KERN_INVALID_ADDRESS;
12068                 }
12069         }
12070
12071         vm_map_unlock_read(map);
12072         return KERN_SUCCESS;
12073 }
12074
12075 static boolean_t
12076 vm_map_entry_is_reusable(
12077         vm_map_entry_t entry)
12078 {
12079         vm_object_t object;
12080
12081         switch (entry->alias) {
12082         case VM_MEMORY_MALLOC:
12083         case VM_MEMORY_MALLOC_SMALL:
12084         case VM_MEMORY_MALLOC_LARGE:
12085         case VM_MEMORY_REALLOC:
12086         case VM_MEMORY_MALLOC_TINY:
12087         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
12088         case VM_MEMORY_MALLOC_LARGE_REUSED:
12089                 /*
12090                  * This is a malloc() memory region: check if it's still
12091                  * in its original state and can be re-used for more
12092                  * malloc() allocations.
12093                  */
12094                 break;
12095         default:
12096                 /*
12097                  * Not a malloc() memory region: let the caller decide if
12098                  * it's re-usable.
12099                  */
12100                 return TRUE;
12101         }
12102
12103         if (entry->is_shared ||
12104             entry->is_sub_map ||
12105             entry->in_transition ||
12106             entry->protection != VM_PROT_DEFAULT ||
12107             entry->max_protection != VM_PROT_ALL ||
12108             entry->inheritance != VM_INHERIT_DEFAULT ||
12109             entry->no_cache ||
12110             entry->permanent ||
12111             entry->superpage_size != FALSE ||
12112             entry->zero_wired_pages ||
12113             entry->wired_count != 0 ||
12114             entry->user_wired_count != 0) {
12115                 return FALSE;
12116         }
12117
12118         object = entry->object.vm_object;
12119         if (object == VM_OBJECT_NULL) {
12120                 return TRUE;
12121         }
12122         if (
12123 #if 0
12124                 /*
12125                  * Let's proceed even if the VM object is potentially
12126                  * shared.
12127                  * We check for this later when processing the actual
12128                  * VM pages, so the contents will be safe if shared.
12129                  *
12130                  * But we can still mark this memory region as "reusable" to
12131                  * acknowledge that the caller did let us know that the memory
12132                  * could be re-used and should not be penalized for holding
12133                  * on to it.  This allows its "resident size" to not include
12134                  * the reusable range.
12135                  */
12136             object->ref_count == 1 &&
12137 #endif
12138             object->wired_page_count == 0 &&
12139             object->copy == VM_OBJECT_NULL &&
12140             object->shadow == VM_OBJECT_NULL &&
12141             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
12142             object->internal &&
12143             !object->true_share &&
12144             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
12145             !object->code_signed) {
12146                 return TRUE;
12147         }
12148         return FALSE;
12149
12150
12151 }
12152
12153 static kern_return_t
12154 vm_map_reuse_pages(
12155         vm_map_t        map,
12156         vm_map_offset_t start,
12157         vm_map_offset_t end)
12158 {
12159         vm_map_entry_t                  entry;
12160         vm_object_t                     object;
12161         vm_object_offset_t              start_offset, end_offset;
12162
12163         /*
12164          * The MADV_REUSE operation doesn't require any changes to the
12165          * vm_map_entry_t's, so the read lock is sufficient.
12166          */
12167
12168         vm_map_lock_read(map);
12169
12170         /*
12171          * The madvise semantics require that the address range be fully
12172          * allocated with no holes.  Otherwise, we're required to return
12173          * an error.
12174          */
12175
12176         if (!vm_map_range_check(map, start, end, &entry)) {
12177                 vm_map_unlock_read(map);
12178                 vm_page_stats_reusable.reuse_pages_failure++;
12179                 return KERN_INVALID_ADDRESS;
12180         }
12181
12182         /*
12183          * Examine each vm_map_entry_t in the range.
12184          */
12185         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12186              entry = entry->vme_next) {
12187                 /*
12188                  * Sanity check on the VM map entry.
12189                  */
12190                 if (! vm_map_entry_is_reusable(entry)) {
12191                         vm_map_unlock_read(map);
12192                         vm_page_stats_reusable.reuse_pages_failure++;
12193                         return KERN_INVALID_ADDRESS;
12194                 }
12195
12196                 /*
12197                  * The first time through, the start address could be anywhere
12198                  * within the vm_map_entry we found.  So adjust the offset to
12199                  * correspond.
12200                  */
12201                 if (entry->vme_start < start) {
12202                         start_offset = start - entry->vme_start;
12203                 } else {
12204                         start_offset = 0;
12205                 }
12206                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12207                 start_offset += entry->offset;
12208                 end_offset += entry->offset;
12209
12210                 object = entry->object.vm_object;
12211                 if (object != VM_OBJECT_NULL) {
12212                         vm_object_lock(object);
12213                         vm_object_reuse_pages(object, start_offset, end_offset,
12214                                               TRUE);
12215                         vm_object_unlock(object);
12216                 }
12217
12218                 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
12219                         /*
12220                          * XXX
12221                          * We do not hold the VM map exclusively here.
12222                          * The "alias" field is not that critical, so it's
12223                          * safe to update it here, as long as it is the only
12224                          * one that can be modified while holding the VM map
12225                          * "shared".
12226                          */
12227                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED;
12228                 }
12229         }
12230
12231         vm_map_unlock_read(map);
12232         vm_page_stats_reusable.reuse_pages_success++;
12233         return KERN_SUCCESS;
12234 }
12235
12236
12237 static kern_return_t
12238 vm_map_reusable_pages(
12239         vm_map_t        map,
12240         vm_map_offset_t start,
12241         vm_map_offset_t end)
12242 {
12243         vm_map_entry_t                  entry;
12244         vm_object_t                     object;
12245         vm_object_offset_t              start_offset, end_offset;
12246
12247         /*
12248          * The MADV_REUSABLE operation doesn't require any changes to the
12249          * vm_map_entry_t's, so the read lock is sufficient.
12250          */
12251
12252         vm_map_lock_read(map);
12253
12254         /*
12255          * The madvise semantics require that the address range be fully
12256          * allocated with no holes.  Otherwise, we're required to return
12257          * an error.
12258          */
12259
12260         if (!vm_map_range_check(map, start, end, &entry)) {
12261                 vm_map_unlock_read(map);
12262                 vm_page_stats_reusable.reusable_pages_failure++;
12263                 return KERN_INVALID_ADDRESS;
12264         }
12265
12266         /*
12267          * Examine each vm_map_entry_t in the range.
12268          */
12269         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12270              entry = entry->vme_next) {
12271                 int kill_pages = 0;
12272
12273                 /*
12274                  * Sanity check on the VM map entry.
12275                  */
12276                 if (! vm_map_entry_is_reusable(entry)) {
12277                         vm_map_unlock_read(map);
12278                         vm_page_stats_reusable.reusable_pages_failure++;
12279                         return KERN_INVALID_ADDRESS;
12280                 }
12281
12282                 /*
12283                  * The first time through, the start address could be anywhere
12284                  * within the vm_map_entry we found.  So adjust the offset to
12285                  * correspond.
12286                  */
12287                 if (entry->vme_start < start) {
12288                         start_offset = start - entry->vme_start;
12289                 } else {
12290                         start_offset = 0;
12291                 }
12292                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
12293                 start_offset += entry->offset;
12294                 end_offset += entry->offset;
12295
12296                 object = entry->object.vm_object;
12297                 if (object == VM_OBJECT_NULL)
12298                         continue;
12299
12300
12301                 vm_object_lock(object);
12302                 if (object->ref_count == 1 &&
12303                     !object->shadow &&
12304                     /*
12305                      * "iokit_acct" entries are billed for their virtual size
12306                      * (rather than for their resident pages only), so they
12307                      * wouldn't benefit from making pages reusable, and it
12308                      * would be hard to keep track of pages that are both
12309                      * "iokit_acct" and "reusable" in the pmap stats and ledgers.
12310                      */
12311                     !(entry->iokit_acct ||
12312                       (!entry->is_sub_map && !entry->use_pmap)))
12313                         kill_pages = 1;
12314                 else
12315                         kill_pages = -1;
12316                 if (kill_pages != -1) {
12317                         vm_object_deactivate_pages(object,
12318                                                    start_offset,
12319                                                    end_offset - start_offset,
12320                                                    kill_pages,
12321                                                    TRUE /*reusable_pages*/);
12322                 } else {
12323                         vm_page_stats_reusable.reusable_pages_shared++;
12324                 }
12325                 vm_object_unlock(object);
12326
12327                 if (entry->alias == VM_MEMORY_MALLOC_LARGE ||
12328                     entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) {
12329                         /*
12330                          * XXX
12331                          * We do not hold the VM map exclusively here.
12332                          * The "alias" field is not that critical, so it's
12333                          * safe to update it here, as long as it is the only
12334                          * one that can be modified while holding the VM map
12335                          * "shared".
12336                          */
12337                         entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE;
12338                 }
12339         }
12340
12341         vm_map_unlock_read(map);
12342         vm_page_stats_reusable.reusable_pages_success++;
12343         return KERN_SUCCESS;
12344 }
12345
12346
12347 static kern_return_t
12348 vm_map_can_reuse(
12349         vm_map_t        map,
12350         vm_map_offset_t start,
12351         vm_map_offset_t end)
12352 {
12353         vm_map_entry_t                  entry;
12354
12355         /*
12356          * The MADV_REUSABLE operation doesn't require any changes to the
12357          * vm_map_entry_t's, so the read lock is sufficient.
12358          */
12359
12360         vm_map_lock_read(map);
12361
12362         /*
12363          * The madvise semantics require that the address range be fully
12364          * allocated with no holes.  Otherwise, we're required to return
12365          * an error.
12366          */
12367
12368         if (!vm_map_range_check(map, start, end, &entry)) {
12369                 vm_map_unlock_read(map);
12370                 vm_page_stats_reusable.can_reuse_failure++;
12371                 return KERN_INVALID_ADDRESS;
12372         }
12373
12374         /*
12375          * Examine each vm_map_entry_t in the range.
12376          */
12377         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
12378              entry = entry->vme_next) {
12379                 /*
12380                  * Sanity check on the VM map entry.
12381                  */
12382                 if (! vm_map_entry_is_reusable(entry)) {
12383                         vm_map_unlock_read(map);
12384                         vm_page_stats_reusable.can_reuse_failure++;
12385                         return KERN_INVALID_ADDRESS;
12386                 }
12387         }
12388
12389         vm_map_unlock_read(map);
12390         vm_page_stats_reusable.can_reuse_success++;
12391         return KERN_SUCCESS;
12392 }
12393
12394
12395 /*
12396  *      Routine:        vm_map_entry_insert
12397  *
12398  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
12399  */
12400 vm_map_entry_t
12401 vm_map_entry_insert(
12402         vm_map_t                map,
12403         vm_map_entry_t          insp_entry,
12404         vm_map_offset_t         start,
12405         vm_map_offset_t         end,
12406         vm_object_t             object,
12407         vm_object_offset_t      offset,
12408         boolean_t               needs_copy,
12409         boolean_t               is_shared,
12410         boolean_t               in_transition,
12411         vm_prot_t               cur_protection,
12412         vm_prot_t               max_protection,
12413         vm_behavior_t           behavior,
12414         vm_inherit_t            inheritance,
12415         unsigned                wired_count,
12416         boolean_t               no_cache,
12417         boolean_t               permanent,
12418         unsigned int            superpage_size,
12419         boolean_t               clear_map_aligned,
12420         boolean_t               is_submap)
12421 {
12422         vm_map_entry_t  new_entry;
12423
12424         assert(insp_entry != (vm_map_entry_t)0);
12425
12426         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
12427
12428         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
12429                 new_entry->map_aligned = TRUE;
12430         } else {
12431                 new_entry->map_aligned = FALSE;
12432         }
12433         if (clear_map_aligned &&
12434             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
12435              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
12436                 new_entry->map_aligned = FALSE;
12437         }
12438
12439         new_entry->vme_start = start;
12440         new_entry->vme_end = end;
12441         assert(page_aligned(new_entry->vme_start));
12442         assert(page_aligned(new_entry->vme_end));
12443         if (new_entry->map_aligned) {
12444                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
12445                                            VM_MAP_PAGE_MASK(map)));
12446                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
12447                                            VM_MAP_PAGE_MASK(map)));
12448         }
12449         assert(new_entry->vme_start < new_entry->vme_end);
12450
12451         new_entry->object.vm_object = object;
12452         new_entry->offset = offset;
12453         new_entry->is_shared = is_shared;
12454         new_entry->is_sub_map = is_submap;
12455         new_entry->needs_copy = needs_copy;
12456         new_entry->in_transition = in_transition;
12457         new_entry->needs_wakeup = FALSE;
12458         new_entry->inheritance = inheritance;
12459         new_entry->protection = cur_protection;
12460         new_entry->max_protection = max_protection;
12461         new_entry->behavior = behavior;
12462         new_entry->wired_count = wired_count;
12463         new_entry->user_wired_count = 0;
12464         if (is_submap) {
12465                 /*
12466                  * submap: "use_pmap" means "nested".
12467                  * default: false.
12468                  */
12469                 new_entry->use_pmap = FALSE;
12470         } else {
12471                 /*
12472                  * object: "use_pmap" means "use pmap accounting" for footprint.
12473                  * default: true.
12474                  */
12475                 new_entry->use_pmap = TRUE;
12476         }
12477         new_entry->alias = 0;
12478         new_entry->zero_wired_pages = FALSE;
12479         new_entry->no_cache = no_cache;
12480         new_entry->permanent = permanent;
12481         if (superpage_size)
12482                 new_entry->superpage_size = TRUE;
12483         else
12484                 new_entry->superpage_size = FALSE;
12485         new_entry->used_for_jit = FALSE;
12486         new_entry->iokit_acct = FALSE;
12487
12488         /*
12489          *      Insert the new entry into the list.
12490          */
12491
12492         vm_map_store_entry_link(map, insp_entry, new_entry);
12493         map->size += end - start;
12494
12495         /*
12496          *      Update the free space hint and the lookup hint.
12497          */
12498
12499         SAVE_HINT_MAP_WRITE(map, new_entry);
12500         return new_entry;
12501 }
12502
12503 /*
12504  *      Routine:        vm_map_remap_extract
12505  *
12506  *      Descritpion:    This routine returns a vm_entry list from a map.
12507  */
12508 static kern_return_t
12509 vm_map_remap_extract(
12510         vm_map_t                map,
12511         vm_map_offset_t         addr,
12512         vm_map_size_t           size,
12513         boolean_t               copy,
12514         struct vm_map_header    *map_header,
12515         vm_prot_t               *cur_protection,
12516         vm_prot_t               *max_protection,
12517         /* What, no behavior? */
12518         vm_inherit_t            inheritance,
12519         boolean_t               pageable)
12520 {
12521         kern_return_t           result;
12522         vm_map_size_t           mapped_size;
12523         vm_map_size_t           tmp_size;
12524         vm_map_entry_t          src_entry;     /* result of last map lookup */
12525         vm_map_entry_t          new_entry;
12526         vm_object_offset_t      offset;
12527         vm_map_offset_t         map_address;
12528         vm_map_offset_t         src_start;     /* start of entry to map */
12529         vm_map_offset_t         src_end;       /* end of region to be mapped */
12530         vm_object_t             object;
12531         vm_map_version_t        version;
12532         boolean_t               src_needs_copy;
12533         boolean_t               new_entry_needs_copy;
12534
12535         assert(map != VM_MAP_NULL);
12536         assert(size != 0);
12537         assert(size == vm_map_round_page(size, PAGE_MASK));
12538         assert(inheritance == VM_INHERIT_NONE ||
12539                inheritance == VM_INHERIT_COPY ||
12540                inheritance == VM_INHERIT_SHARE);
12541
12542         /*
12543          *      Compute start and end of region.
12544          */
12545         src_start = vm_map_trunc_page(addr, PAGE_MASK);
12546         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
12547
12548
12549         /*
12550          *      Initialize map_header.
12551          */
12552         map_header->links.next = (struct vm_map_entry *)&map_header->links;
12553         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
12554         map_header->nentries = 0;
12555         map_header->entries_pageable = pageable;
12556         map_header->page_shift = PAGE_SHIFT;
12557
12558         vm_map_store_init( map_header );
12559
12560         *cur_protection = VM_PROT_ALL;
12561         *max_protection = VM_PROT_ALL;
12562
12563         map_address = 0;
12564         mapped_size = 0;
12565         result = KERN_SUCCESS;
12566
12567         /*
12568          *      The specified source virtual space might correspond to
12569          *      multiple map entries, need to loop on them.
12570          */
12571         vm_map_lock(map);
12572         while (mapped_size != size) {
12573                 vm_map_size_t   entry_size;
12574
12575                 /*
12576                  *      Find the beginning of the region.
12577                  */
12578                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
12579                         result = KERN_INVALID_ADDRESS;
12580                         break;
12581                 }
12582
12583                 if (src_start < src_entry->vme_start ||
12584                     (mapped_size && src_start != src_entry->vme_start)) {
12585                         result = KERN_INVALID_ADDRESS;
12586                         break;
12587                 }
12588
12589                 tmp_size = size - mapped_size;
12590                 if (src_end > src_entry->vme_end)
12591                         tmp_size -= (src_end - src_entry->vme_end);
12592
12593                 entry_size = (vm_map_size_t)(src_entry->vme_end -
12594                                              src_entry->vme_start);
12595
12596                 if(src_entry->is_sub_map) {
12597                         vm_map_reference(src_entry->object.sub_map);
12598                         object = VM_OBJECT_NULL;
12599                 } else {
12600                         object = src_entry->object.vm_object;
12601                         if (src_entry->iokit_acct) {
12602                                 /*
12603                                  * This entry uses "IOKit accounting".
12604                                  */
12605                         } else if (object != VM_OBJECT_NULL &&
12606                                    object->purgable != VM_PURGABLE_DENY) {
12607                                 /*
12608                                  * Purgeable objects have their own accounting:
12609                                  * no pmap accounting for them.
12610                                  */
12611                                 assert(!src_entry->use_pmap);
12612                         } else {
12613                                 /*
12614                                  * Not IOKit or purgeable:
12615                                  * must be accounted by pmap stats.
12616                                  */
12617                                 assert(src_entry->use_pmap);
12618                         }
12619
12620                         if (object == VM_OBJECT_NULL) {
12621                                 object = vm_object_allocate(entry_size);
12622                                 src_entry->offset = 0;
12623                                 src_entry->object.vm_object = object;
12624                         } else if (object->copy_strategy !=
12625                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
12626                                 /*
12627                                  *      We are already using an asymmetric
12628                                  *      copy, and therefore we already have
12629                                  *      the right object.
12630                                  */
12631                                 assert(!src_entry->needs_copy);
12632                         } else if (src_entry->needs_copy || object->shadowed ||
12633                                    (object->internal && !object->true_share &&
12634                                     !src_entry->is_shared &&
12635                                     object->vo_size > entry_size)) {
12636
12637                                 vm_object_shadow(&src_entry->object.vm_object,
12638                                                  &src_entry->offset,
12639                                                  entry_size);
12640
12641                                 if (!src_entry->needs_copy &&
12642                                     (src_entry->protection & VM_PROT_WRITE)) {
12643                                         vm_prot_t prot;
12644
12645                                         prot = src_entry->protection & ~VM_PROT_WRITE;
12646
12647                                         if (override_nx(map, src_entry->alias) && prot)
12648                                                 prot |= VM_PROT_EXECUTE;
12649
12650                                         if(map->mapped_in_other_pmaps) {
12651                                                 vm_object_pmap_protect(
12652                                                         src_entry->object.vm_object,
12653                                                         src_entry->offset,
12654                                                         entry_size,
12655                                                         PMAP_NULL,
12656                                                         src_entry->vme_start,
12657                                                         prot);
12658                                         } else {
12659                                                 pmap_protect(vm_map_pmap(map),
12660                                                              src_entry->vme_start,
12661                                                              src_entry->vme_end,
12662                                                              prot);
12663                                         }
12664                                 }
12665
12666                                 object = src_entry->object.vm_object;
12667                                 src_entry->needs_copy = FALSE;
12668                         }
12669
12670
12671                         vm_object_lock(object);
12672                         vm_object_reference_locked(object); /* object ref. for new entry */
12673                         if (object->copy_strategy ==
12674                             MEMORY_OBJECT_COPY_SYMMETRIC) {
12675                                 object->copy_strategy =
12676                                         MEMORY_OBJECT_COPY_DELAY;
12677                         }
12678                         vm_object_unlock(object);
12679                 }
12680
12681                 offset = src_entry->offset + (src_start - src_entry->vme_start);
12682
12683                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
12684                 vm_map_entry_copy(new_entry, src_entry);
12685                 if (new_entry->is_sub_map) {
12686                         /* clr address space specifics */
12687                         new_entry->use_pmap = FALSE;
12688                 }
12689
12690                 new_entry->map_aligned = FALSE;
12691
12692                 new_entry->vme_start = map_address;
12693                 new_entry->vme_end = map_address + tmp_size;
12694                 assert(new_entry->vme_start < new_entry->vme_end);
12695                 new_entry->inheritance = inheritance;
12696                 new_entry->offset = offset;
12697
12698                 /*
12699                  * The new region has to be copied now if required.
12700                  */
12701         RestartCopy:
12702                 if (!copy) {
12703                         /*
12704                          * Cannot allow an entry describing a JIT
12705                          * region to be shared across address spaces.
12706                          */
12707                         if (src_entry->used_for_jit == TRUE) {
12708                                 result = KERN_INVALID_ARGUMENT;
12709                                 break;
12710                         }
12711                         src_entry->is_shared = TRUE;
12712                         new_entry->is_shared = TRUE;
12713                         if (!(new_entry->is_sub_map))
12714                                 new_entry->needs_copy = FALSE;
12715
12716                 } else if (src_entry->is_sub_map) {
12717                         /* make this a COW sub_map if not already */
12718                         new_entry->needs_copy = TRUE;
12719                         object = VM_OBJECT_NULL;
12720                 } else if (src_entry->wired_count == 0 &&
12721                            vm_object_copy_quickly(&new_entry->object.vm_object,
12722                                                   new_entry->offset,
12723                                                   (new_entry->vme_end -
12724                                                    new_entry->vme_start),
12725                                                   &src_needs_copy,
12726                                                   &new_entry_needs_copy)) {
12727
12728                         new_entry->needs_copy = new_entry_needs_copy;
12729                         new_entry->is_shared = FALSE;
12730
12731                         /*
12732                          * Handle copy_on_write semantics.
12733                          */
12734                         if (src_needs_copy && !src_entry->needs_copy) {
12735                                 vm_prot_t prot;
12736
12737                                 prot = src_entry->protection & ~VM_PROT_WRITE;
12738
12739                                 if (override_nx(map, src_entry->alias) && prot)
12740                                         prot |= VM_PROT_EXECUTE;
12741
12742                                 vm_object_pmap_protect(object,
12743                                                        offset,
12744                                                        entry_size,
12745                                                        ((src_entry->is_shared
12746                                                          || map->mapped_in_other_pmaps) ?
12747                                                         PMAP_NULL : map->pmap),
12748                                                        src_entry->vme_start,
12749                                                        prot);
12750
12751                                 src_entry->needs_copy = TRUE;
12752                         }
12753                         /*
12754                          * Throw away the old object reference of the new entry.
12755                          */
12756                         vm_object_deallocate(object);
12757
12758                 } else {
12759                         new_entry->is_shared = FALSE;
12760
12761                         /*
12762                          * The map can be safely unlocked since we
12763                          * already hold a reference on the object.
12764                          *
12765                          * Record the timestamp of the map for later
12766                          * verification, and unlock the map.
12767                          */
12768                         version.main_timestamp = map->timestamp;
12769                         vm_map_unlock(map);     /* Increments timestamp once! */
12770
12771                         /*
12772                          * Perform the copy.
12773                          */
12774                         if (src_entry->wired_count > 0) {
12775                                 vm_object_lock(object);
12776                                 result = vm_object_copy_slowly(
12777                                         object,
12778                                         offset,
12779                                         entry_size,
12780                                         THREAD_UNINT,
12781                                         &new_entry->object.vm_object);
12782
12783                                 new_entry->offset = 0;
12784                                 new_entry->needs_copy = FALSE;
12785                         } else {
12786                                 result = vm_object_copy_strategically(
12787                                         object,
12788                                         offset,
12789                                         entry_size,
12790                                         &new_entry->object.vm_object,
12791                                         &new_entry->offset,
12792                                         &new_entry_needs_copy);
12793
12794                                 new_entry->needs_copy = new_entry_needs_copy;
12795                         }
12796
12797                         /*
12798                          * Throw away the old object reference of the new entry.
12799                          */
12800                         vm_object_deallocate(object);
12801
12802                         if (result != KERN_SUCCESS &&
12803                             result != KERN_MEMORY_RESTART_COPY) {
12804                                 _vm_map_entry_dispose(map_header, new_entry);
12805                                 break;
12806                         }
12807
12808                         /*
12809                          * Verify that the map has not substantially
12810                          * changed while the copy was being made.
12811                          */
12812
12813                         vm_map_lock(map);
12814                         if (version.main_timestamp + 1 != map->timestamp) {
12815                                 /*
12816                                  * Simple version comparison failed.
12817                                  *
12818                                  * Retry the lookup and verify that the
12819                                  * same object/offset are still present.
12820                                  */
12821                                 vm_object_deallocate(new_entry->
12822                                                      object.vm_object);
12823                                 _vm_map_entry_dispose(map_header, new_entry);
12824                                 if (result == KERN_MEMORY_RESTART_COPY)
12825                                         result = KERN_SUCCESS;
12826                                 continue;
12827                         }
12828
12829                         if (result == KERN_MEMORY_RESTART_COPY) {
12830                                 vm_object_reference(object);
12831                                 goto RestartCopy;
12832                         }
12833                 }
12834
12835                 _vm_map_store_entry_link(map_header,
12836                                    map_header->links.prev, new_entry);
12837
12838                 /*Protections for submap mapping are irrelevant here*/
12839                 if( !src_entry->is_sub_map ) {
12840                         *cur_protection &= src_entry->protection;
12841                         *max_protection &= src_entry->max_protection;
12842                 }
12843                 map_address += tmp_size;
12844                 mapped_size += tmp_size;
12845                 src_start += tmp_size;
12846
12847         } /* end while */
12848
12849         vm_map_unlock(map);
12850         if (result != KERN_SUCCESS) {
12851                 /*
12852                  * Free all allocated elements.
12853                  */
12854                 for (src_entry = map_header->links.next;
12855                      src_entry != (struct vm_map_entry *)&map_header->links;
12856                      src_entry = new_entry) {
12857                         new_entry = src_entry->vme_next;
12858                         _vm_map_store_entry_unlink(map_header, src_entry);
12859                         if (src_entry->is_sub_map) {
12860                                 vm_map_deallocate(src_entry->object.sub_map);
12861                         } else {
12862                                 vm_object_deallocate(src_entry->object.vm_object);
12863                         }
12864                         _vm_map_entry_dispose(map_header, src_entry);
12865                 }
12866         }
12867         return result;
12868 }
12869
12870 /*
12871  *      Routine:        vm_remap
12872  *
12873  *                      Map portion of a task's address space.
12874  *                      Mapped region must not overlap more than
12875  *                      one vm memory object. Protections and
12876  *                      inheritance attributes remain the same
12877  *                      as in the original task and are out parameters.
12878  *                      Source and Target task can be identical
12879  *                      Other attributes are identical as for vm_map()
12880  */
12881 kern_return_t
12882 vm_map_remap(
12883         vm_map_t                target_map,
12884         vm_map_address_t        *address,
12885         vm_map_size_t           size,
12886         vm_map_offset_t         mask,
12887         int                     flags,
12888         vm_map_t                src_map,
12889         vm_map_offset_t         memory_address,
12890         boolean_t               copy,
12891         vm_prot_t               *cur_protection,
12892         vm_prot_t               *max_protection,
12893         vm_inherit_t            inheritance)
12894 {
12895         kern_return_t           result;
12896         vm_map_entry_t          entry;
12897         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
12898         vm_map_entry_t          new_entry;
12899         struct vm_map_header    map_header;
12900         vm_map_offset_t         offset_in_mapping;
12901
12902         if (target_map == VM_MAP_NULL)
12903                 return KERN_INVALID_ARGUMENT;
12904
12905         switch (inheritance) {
12906         case VM_INHERIT_NONE:
12907         case VM_INHERIT_COPY:
12908         case VM_INHERIT_SHARE:
12909                 if (size != 0 && src_map != VM_MAP_NULL)
12910                         break;
12911                 /*FALL THRU*/
12912         default:
12913                 return KERN_INVALID_ARGUMENT;
12914         }
12915
12916         /*
12917          * If the user is requesting that we return the address of the
12918          * first byte of the data (rather than the base of the page),
12919          * then we use different rounding semantics: specifically,
12920          * we assume that (memory_address, size) describes a region
12921          * all of whose pages we must cover, rather than a base to be truncated
12922          * down and a size to be added to that base.  So we figure out
12923          * the highest page that the requested region includes and make
12924          * sure that the size will cover it.
12925          *
12926          * The key example we're worried about it is of the form:
12927          *
12928          *              memory_address = 0x1ff0, size = 0x20
12929          *
12930          * With the old semantics, we round down the memory_address to 0x1000
12931          * and round up the size to 0x1000, resulting in our covering *only*
12932          * page 0x1000.  With the new semantics, we'd realize that the region covers
12933          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
12934          * 0x1000 and page 0x2000 in the region we remap.
12935          */
12936         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
12937                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
12938                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
12939         } else {
12940                 size = vm_map_round_page(size, PAGE_MASK);
12941         }
12942
12943         result = vm_map_remap_extract(src_map, memory_address,
12944                                       size, copy, &map_header,
12945                                       cur_protection,
12946                                       max_protection,
12947                                       inheritance,
12948                                       target_map->hdr.entries_pageable);
12949
12950         if (result != KERN_SUCCESS) {
12951                 return result;
12952         }
12953
12954         /*
12955          * Allocate/check a range of free virtual address
12956          * space for the target
12957          */
12958         *address = vm_map_trunc_page(*address,
12959                                      VM_MAP_PAGE_MASK(target_map));
12960         vm_map_lock(target_map);
12961         result = vm_map_remap_range_allocate(target_map, address, size,
12962                                              mask, flags, &insp_entry);
12963
12964         for (entry = map_header.links.next;
12965              entry != (struct vm_map_entry *)&map_header.links;
12966              entry = new_entry) {
12967                 new_entry = entry->vme_next;
12968                 _vm_map_store_entry_unlink(&map_header, entry);
12969                 if (result == KERN_SUCCESS) {
12970                         entry->vme_start += *address;
12971                         entry->vme_end += *address;
12972                         assert(!entry->map_aligned);
12973                         vm_map_store_entry_link(target_map, insp_entry, entry);
12974                         insp_entry = entry;
12975                 } else {
12976                         if (!entry->is_sub_map) {
12977                                 vm_object_deallocate(entry->object.vm_object);
12978                         } else {
12979                                 vm_map_deallocate(entry->object.sub_map);
12980                         }
12981                         _vm_map_entry_dispose(&map_header, entry);
12982                 }
12983         }
12984
12985         if( target_map->disable_vmentry_reuse == TRUE) {
12986                 if( target_map->highest_entry_end < insp_entry->vme_end ){
12987                         target_map->highest_entry_end = insp_entry->vme_end;
12988                 }
12989         }
12990
12991         if (result == KERN_SUCCESS) {
12992                 target_map->size += size;
12993                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
12994         }
12995         vm_map_unlock(target_map);
12996
12997         if (result == KERN_SUCCESS && target_map->wiring_required)
12998                 result = vm_map_wire(target_map, *address,
12999                                      *address + size, *cur_protection, TRUE);
13000
13001         /*
13002          * If requested, return the address of the data pointed to by the
13003          * request, rather than the base of the resulting page.
13004          */
13005         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
13006                 *address += offset_in_mapping;
13007         }
13008
13009         return result;
13010 }
13011
13012 /*
13013  *      Routine:        vm_map_remap_range_allocate
13014  *
13015  *      Description:
13016  *              Allocate a range in the specified virtual address map.
13017  *              returns the address and the map entry just before the allocated
13018  *              range
13019  *
13020  *      Map must be locked.
13021  */
13022
13023 static kern_return_t
13024 vm_map_remap_range_allocate(
13025         vm_map_t                map,
13026         vm_map_address_t        *address,       /* IN/OUT */
13027         vm_map_size_t           size,
13028         vm_map_offset_t         mask,
13029         int                     flags,
13030         vm_map_entry_t          *map_entry)     /* OUT */
13031 {
13032         vm_map_entry_t  entry;
13033         vm_map_offset_t start;
13034         vm_map_offset_t end;
13035         kern_return_t   kr;
13036
13037 StartAgain: ;
13038
13039         start = *address;
13040
13041         if (flags & VM_FLAGS_ANYWHERE)
13042         {
13043                 /*
13044                  *      Calculate the first possible address.
13045                  */
13046
13047                 if (start < map->min_offset)
13048                         start = map->min_offset;
13049                 if (start > map->max_offset)
13050                         return(KERN_NO_SPACE);
13051
13052                 /*
13053                  *      Look for the first possible address;
13054                  *      if there's already something at this
13055                  *      address, we have to start after it.
13056                  */
13057
13058                 if( map->disable_vmentry_reuse == TRUE) {
13059                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
13060                 } else {
13061                         assert(first_free_is_valid(map));
13062                         if (start == map->min_offset) {
13063                                 if ((entry = map->first_free) != vm_map_to_entry(map))
13064                                         start = entry->vme_end;
13065                         } else {
13066                                 vm_map_entry_t  tmp_entry;
13067                                 if (vm_map_lookup_entry(map, start, &tmp_entry))
13068                                         start = tmp_entry->vme_end;
13069                                 entry = tmp_entry;
13070                         }
13071                         start = vm_map_round_page(start,
13072                                                   VM_MAP_PAGE_MASK(map));
13073                 }
13074
13075                 /*
13076                  *      In any case, the "entry" always precedes
13077                  *      the proposed new region throughout the
13078                  *      loop:
13079                  */
13080
13081                 while (TRUE) {
13082                         register vm_map_entry_t next;
13083
13084                         /*
13085                          *      Find the end of the proposed new region.
13086                          *      Be sure we didn't go beyond the end, or
13087                          *      wrap around the address.
13088                          */
13089
13090                         end = ((start + mask) & ~mask);
13091                         end = vm_map_round_page(end,
13092                                                 VM_MAP_PAGE_MASK(map));
13093                         if (end < start)
13094                                 return(KERN_NO_SPACE);
13095                         start = end;
13096                         end += size;
13097
13098                         if ((end > map->max_offset) || (end < start)) {
13099                                 if (map->wait_for_space) {
13100                                         if (size <= (map->max_offset -
13101                                                      map->min_offset)) {
13102                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
13103                                                 vm_map_unlock(map);
13104                                                 thread_block(THREAD_CONTINUE_NULL);
13105                                                 vm_map_lock(map);
13106                                                 goto StartAgain;
13107                                         }
13108                                 }
13109
13110                                 return(KERN_NO_SPACE);
13111                         }
13112
13113                         /*
13114                          *      If there are no more entries, we must win.
13115                          */
13116
13117                         next = entry->vme_next;
13118                         if (next == vm_map_to_entry(map))
13119                                 break;
13120
13121                         /*
13122                          *      If there is another entry, it must be
13123                          *      after the end of the potential new region.
13124                          */
13125
13126                         if (next->vme_start >= end)
13127                                 break;
13128
13129                         /*
13130                          *      Didn't fit -- move to the next entry.
13131                          */
13132
13133                         entry = next;
13134                         start = entry->vme_end;
13135                 }
13136                 *address = start;
13137         } else {
13138                 vm_map_entry_t          temp_entry;
13139
13140                 /*
13141                  *      Verify that:
13142                  *              the address doesn't itself violate
13143                  *              the mask requirement.
13144                  */
13145
13146                 if ((start & mask) != 0)
13147                         return(KERN_NO_SPACE);
13148
13149
13150                 /*
13151                  *      ...     the address is within bounds
13152                  */
13153
13154                 end = start + size;
13155
13156                 if ((start < map->min_offset) ||
13157                     (end > map->max_offset) ||
13158                     (start >= end)) {
13159                         return(KERN_INVALID_ADDRESS);
13160                 }
13161
13162                 /*
13163                  * If we're asked to overwrite whatever was mapped in that
13164                  * range, first deallocate that range.
13165                  */
13166                 if (flags & VM_FLAGS_OVERWRITE) {
13167                         vm_map_t zap_map;
13168
13169                         /*
13170                          * We use a "zap_map" to avoid having to unlock
13171                          * the "map" in vm_map_delete(), which would compromise
13172                          * the atomicity of the "deallocate" and then "remap"
13173                          * combination.
13174                          */
13175                         zap_map = vm_map_create(PMAP_NULL,
13176                                                 start,
13177                                                 end,
13178                                                 map->hdr.entries_pageable);
13179                         if (zap_map == VM_MAP_NULL) {
13180                                 return KERN_RESOURCE_SHORTAGE;
13181                         }
13182                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
13183
13184                         kr = vm_map_delete(map, start, end,
13185                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
13186                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
13187                                            zap_map);
13188                         if (kr == KERN_SUCCESS) {
13189                                 vm_map_destroy(zap_map,
13190                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
13191                                 zap_map = VM_MAP_NULL;
13192                         }
13193                 }
13194
13195                 /*
13196                  *      ...     the starting address isn't allocated
13197                  */
13198
13199                 if (vm_map_lookup_entry(map, start, &temp_entry))
13200                         return(KERN_NO_SPACE);
13201
13202                 entry = temp_entry;
13203
13204                 /*
13205                  *      ...     the next region doesn't overlap the
13206                  *              end point.
13207                  */
13208
13209                 if ((entry->vme_next != vm_map_to_entry(map)) &&
13210                     (entry->vme_next->vme_start < end))
13211                         return(KERN_NO_SPACE);
13212         }
13213         *map_entry = entry;
13214         return(KERN_SUCCESS);
13215 }
13216
13217 /*
13218  *      vm_map_switch:
13219  *
13220  *      Set the address map for the current thread to the specified map
13221  */
13222
13223 vm_map_t
13224 vm_map_switch(
13225         vm_map_t        map)
13226 {
13227         int             mycpu;
13228         thread_t        thread = current_thread();
13229         vm_map_t        oldmap = thread->map;
13230
13231         mp_disable_preemption();
13232         mycpu = cpu_number();
13233
13234         /*
13235          *      Deactivate the current map and activate the requested map
13236          */
13237         PMAP_SWITCH_USER(thread, map, mycpu);
13238
13239         mp_enable_preemption();
13240         return(oldmap);
13241 }
13242
13243
13244 /*
13245  *      Routine:        vm_map_write_user
13246  *
13247  *      Description:
13248  *              Copy out data from a kernel space into space in the
13249  *              destination map. The space must already exist in the
13250  *              destination map.
13251  *              NOTE:  This routine should only be called by threads
13252  *              which can block on a page fault. i.e. kernel mode user
13253  *              threads.
13254  *
13255  */
13256 kern_return_t
13257 vm_map_write_user(
13258         vm_map_t                map,
13259         void                    *src_p,
13260         vm_map_address_t        dst_addr,
13261         vm_size_t               size)
13262 {
13263         kern_return_t   kr = KERN_SUCCESS;
13264
13265         if(current_map() == map) {
13266                 if (copyout(src_p, dst_addr, size)) {
13267                         kr = KERN_INVALID_ADDRESS;
13268                 }
13269         } else {
13270                 vm_map_t        oldmap;
13271
13272                 /* take on the identity of the target map while doing */
13273                 /* the transfer */
13274
13275                 vm_map_reference(map);
13276                 oldmap = vm_map_switch(map);
13277                 if (copyout(src_p, dst_addr, size)) {
13278                         kr = KERN_INVALID_ADDRESS;
13279                 }
13280                 vm_map_switch(oldmap);
13281                 vm_map_deallocate(map);
13282         }
13283         return kr;
13284 }
13285
13286 /*
13287  *      Routine:        vm_map_read_user
13288  *
13289  *      Description:
13290  *              Copy in data from a user space source map into the
13291  *              kernel map. The space must already exist in the
13292  *              kernel map.
13293  *              NOTE:  This routine should only be called by threads
13294  *              which can block on a page fault. i.e. kernel mode user
13295  *              threads.
13296  *
13297  */
13298 kern_return_t
13299 vm_map_read_user(
13300         vm_map_t                map,
13301         vm_map_address_t        src_addr,
13302         void                    *dst_p,
13303         vm_size_t               size)
13304 {
13305         kern_return_t   kr = KERN_SUCCESS;
13306
13307         if(current_map() == map) {
13308                 if (copyin(src_addr, dst_p, size)) {
13309                         kr = KERN_INVALID_ADDRESS;
13310                 }
13311         } else {
13312                 vm_map_t        oldmap;
13313
13314                 /* take on the identity of the target map while doing */
13315                 /* the transfer */
13316
13317                 vm_map_reference(map);
13318                 oldmap = vm_map_switch(map);
13319                 if (copyin(src_addr, dst_p, size)) {
13320                         kr = KERN_INVALID_ADDRESS;
13321                 }
13322                 vm_map_switch(oldmap);
13323                 vm_map_deallocate(map);
13324         }
13325         return kr;
13326 }
13327
13328
13329 /*
13330  *      vm_map_check_protection:
13331  *
13332  *      Assert that the target map allows the specified
13333  *      privilege on the entire address region given.
13334  *      The entire region must be allocated.
13335  */
13336 boolean_t
13337 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
13338                         vm_map_offset_t end, vm_prot_t protection)
13339 {
13340         vm_map_entry_t entry;
13341         vm_map_entry_t tmp_entry;
13342
13343         vm_map_lock(map);
13344
13345         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
13346         {
13347                 vm_map_unlock(map);
13348                 return (FALSE);
13349         }
13350
13351         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13352                 vm_map_unlock(map);
13353                 return(FALSE);
13354         }
13355
13356         entry = tmp_entry;
13357
13358         while (start < end) {
13359                 if (entry == vm_map_to_entry(map)) {
13360                         vm_map_unlock(map);
13361                         return(FALSE);
13362                 }
13363
13364                 /*
13365                  *      No holes allowed!
13366                  */
13367
13368                 if (start < entry->vme_start) {
13369                         vm_map_unlock(map);
13370                         return(FALSE);
13371                 }
13372
13373                 /*
13374                  * Check protection associated with entry.
13375                  */
13376
13377                 if ((entry->protection & protection) != protection) {
13378                         vm_map_unlock(map);
13379                         return(FALSE);
13380                 }
13381
13382                 /* go to next entry */
13383
13384                 start = entry->vme_end;
13385                 entry = entry->vme_next;
13386         }
13387         vm_map_unlock(map);
13388         return(TRUE);
13389 }
13390
13391 kern_return_t
13392 vm_map_purgable_control(
13393         vm_map_t                map,
13394         vm_map_offset_t         address,
13395         vm_purgable_t           control,
13396         int                     *state)
13397 {
13398         vm_map_entry_t          entry;
13399         vm_object_t             object;
13400         kern_return_t           kr;
13401         boolean_t               was_nonvolatile;
13402
13403         /*
13404          * Vet all the input parameters and current type and state of the
13405          * underlaying object.  Return with an error if anything is amiss.
13406          */
13407         if (map == VM_MAP_NULL)
13408                 return(KERN_INVALID_ARGUMENT);
13409
13410         if (control != VM_PURGABLE_SET_STATE &&
13411             control != VM_PURGABLE_GET_STATE &&
13412             control != VM_PURGABLE_PURGE_ALL)
13413                 return(KERN_INVALID_ARGUMENT);
13414
13415         if (control == VM_PURGABLE_PURGE_ALL) {
13416                 vm_purgeable_object_purge_all();
13417                 return KERN_SUCCESS;
13418         }
13419
13420         if (control == VM_PURGABLE_SET_STATE &&
13421             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
13422              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
13423                 return(KERN_INVALID_ARGUMENT);
13424
13425         vm_map_lock_read(map);
13426
13427         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
13428
13429                 /*
13430                  * Must pass a valid non-submap address.
13431                  */
13432                 vm_map_unlock_read(map);
13433                 return(KERN_INVALID_ADDRESS);
13434         }
13435
13436         if ((entry->protection & VM_PROT_WRITE) == 0) {
13437                 /*
13438                  * Can't apply purgable controls to something you can't write.
13439                  */
13440                 vm_map_unlock_read(map);
13441                 return(KERN_PROTECTION_FAILURE);
13442         }
13443
13444         object = entry->object.vm_object;
13445         if (object == VM_OBJECT_NULL ||
13446             object->purgable == VM_PURGABLE_DENY) {
13447                 /*
13448                  * Object must already be present and be purgeable.
13449                  */
13450                 vm_map_unlock_read(map);
13451                 return KERN_INVALID_ARGUMENT;
13452         }
13453
13454         vm_object_lock(object);
13455
13456 #if 00
13457         if (entry->offset != 0 ||
13458             entry->vme_end - entry->vme_start != object->vo_size) {
13459                 /*
13460                  * Can only apply purgable controls to the whole (existing)
13461                  * object at once.
13462                  */
13463                 vm_map_unlock_read(map);
13464                 vm_object_unlock(object);
13465                 return KERN_INVALID_ARGUMENT;
13466         }
13467 #endif
13468
13469         assert(!entry->is_sub_map);
13470         assert(!entry->use_pmap); /* purgeable has its own accounting */
13471
13472         vm_map_unlock_read(map);
13473
13474         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
13475
13476         kr = vm_object_purgable_control(object, control, state);
13477
13478         if (was_nonvolatile &&
13479             object->purgable != VM_PURGABLE_NONVOLATILE &&
13480             map->pmap == kernel_pmap) {
13481 #if DEBUG
13482                 object->vo_purgeable_volatilizer = kernel_task;
13483 #endif /* DEBUG */
13484         }
13485
13486         vm_object_unlock(object);
13487
13488         return kr;
13489 }
13490
13491 kern_return_t
13492 vm_map_page_query_internal(
13493         vm_map_t        target_map,
13494         vm_map_offset_t offset,
13495         int             *disposition,
13496         int             *ref_count)
13497 {
13498         kern_return_t                   kr;
13499         vm_page_info_basic_data_t       info;
13500         mach_msg_type_number_t          count;
13501
13502         count = VM_PAGE_INFO_BASIC_COUNT;
13503         kr = vm_map_page_info(target_map,
13504                               offset,
13505                               VM_PAGE_INFO_BASIC,
13506                               (vm_page_info_t) &info,
13507                               &count);
13508         if (kr == KERN_SUCCESS) {
13509                 *disposition = info.disposition;
13510                 *ref_count = info.ref_count;
13511         } else {
13512                 *disposition = 0;
13513                 *ref_count = 0;
13514         }
13515
13516         return kr;
13517 }
13518
13519 kern_return_t
13520 vm_map_page_info(
13521         vm_map_t                map,
13522         vm_map_offset_t         offset,
13523         vm_page_info_flavor_t   flavor,
13524         vm_page_info_t          info,
13525         mach_msg_type_number_t  *count)
13526 {
13527         vm_map_entry_t          map_entry;
13528         vm_object_t             object;
13529         vm_page_t               m;
13530         kern_return_t           kr;
13531         kern_return_t           retval = KERN_SUCCESS;
13532         boolean_t               top_object;
13533         int                     disposition;
13534         int                     ref_count;
13535         vm_page_info_basic_t    basic_info;
13536         int                     depth;
13537         vm_map_offset_t         offset_in_page;
13538
13539         switch (flavor) {
13540         case VM_PAGE_INFO_BASIC:
13541                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
13542                         /*
13543                          * The "vm_page_info_basic_data" structure was not
13544                          * properly padded, so allow the size to be off by
13545                          * one to maintain backwards binary compatibility...
13546                          */
13547                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
13548                                 return KERN_INVALID_ARGUMENT;
13549                 }
13550                 break;
13551         default:
13552                 return KERN_INVALID_ARGUMENT;
13553         }
13554
13555         disposition = 0;
13556         ref_count = 0;
13557         top_object = TRUE;
13558         depth = 0;
13559
13560         retval = KERN_SUCCESS;
13561         offset_in_page = offset & PAGE_MASK;
13562         offset = vm_map_trunc_page(offset, PAGE_MASK);
13563
13564         vm_map_lock_read(map);
13565
13566         /*
13567          * First, find the map entry covering "offset", going down
13568          * submaps if necessary.
13569          */
13570         for (;;) {
13571                 if (!vm_map_lookup_entry(map, offset, &map_entry)) {
13572                         vm_map_unlock_read(map);
13573                         return KERN_INVALID_ADDRESS;
13574                 }
13575                 /* compute offset from this map entry's start */
13576                 offset -= map_entry->vme_start;
13577                 /* compute offset into this map entry's object (or submap) */
13578                 offset += map_entry->offset;
13579
13580                 if (map_entry->is_sub_map) {
13581                         vm_map_t sub_map;
13582
13583                         sub_map = map_entry->object.sub_map;
13584                         vm_map_lock_read(sub_map);
13585                         vm_map_unlock_read(map);
13586
13587                         map = sub_map;
13588
13589                         ref_count = MAX(ref_count, map->ref_count);
13590                         continue;
13591                 }
13592                 break;
13593         }
13594
13595         object = map_entry->object.vm_object;
13596         if (object == VM_OBJECT_NULL) {
13597                 /* no object -> no page */
13598                 vm_map_unlock_read(map);
13599                 goto done;
13600         }
13601
13602         vm_object_lock(object);
13603         vm_map_unlock_read(map);
13604
13605         /*
13606          * Go down the VM object shadow chain until we find the page
13607          * we're looking for.
13608          */
13609         for (;;) {
13610                 ref_count = MAX(ref_count, object->ref_count);
13611
13612                 m = vm_page_lookup(object, offset);
13613
13614                 if (m != VM_PAGE_NULL) {
13615                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
13616                         break;
13617                 } else {
13618 #if MACH_PAGEMAP
13619                         if (object->existence_map) {
13620                                 if (vm_external_state_get(object->existence_map,
13621                                                           offset) ==
13622                                     VM_EXTERNAL_STATE_EXISTS) {
13623                                         /*
13624                                          * this page has been paged out
13625                                          */
13626                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13627                                         break;
13628                                 }
13629                         } else
13630 #endif
13631                         if (object->internal &&
13632                             object->alive &&
13633                             !object->terminating &&
13634                             object->pager_ready) {
13635
13636                                 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
13637                                         if (VM_COMPRESSOR_PAGER_STATE_GET(
13638                                                     object,
13639                                                     offset)
13640                                             == VM_EXTERNAL_STATE_EXISTS) {
13641                                                 /* the pager has that page */
13642                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13643                                                 break;
13644                                         }
13645                                 } else {
13646                                         memory_object_t pager;
13647
13648                                         vm_object_paging_begin(object);
13649                                         pager = object->pager;
13650                                         vm_object_unlock(object);
13651
13652                                         /*
13653                                          * Ask the default pager if
13654                                          * it has this page.
13655                                          */
13656                                         kr = memory_object_data_request(
13657                                                 pager,
13658                                                 offset + object->paging_offset,
13659                                                 0, /* just poke the pager */
13660                                                 VM_PROT_READ,
13661                                                 NULL);
13662
13663                                         vm_object_lock(object);
13664                                         vm_object_paging_end(object);
13665
13666                                         if (kr == KERN_SUCCESS) {
13667                                                 /* the default pager has it */
13668                                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
13669                                                 break;
13670                                         }
13671                                 }
13672                         }
13673
13674                         if (object->shadow != VM_OBJECT_NULL) {
13675                                 vm_object_t shadow;
13676
13677                                 offset += object->vo_shadow_offset;
13678                                 shadow = object->shadow;
13679
13680                                 vm_object_lock(shadow);
13681                                 vm_object_unlock(object);
13682
13683                                 object = shadow;
13684                                 top_object = FALSE;
13685                                 depth++;
13686                         } else {
13687 //                              if (!object->internal)
13688 //                                      break;
13689 //                              retval = KERN_FAILURE;
13690 //                              goto done_with_object;
13691                                 break;
13692                         }
13693                 }
13694         }
13695         /* The ref_count is not strictly accurate, it measures the number   */
13696         /* of entities holding a ref on the object, they may not be mapping */
13697         /* the object or may not be mapping the section holding the         */
13698         /* target page but its still a ball park number and though an over- */
13699         /* count, it picks up the copy-on-write cases                       */
13700
13701         /* We could also get a picture of page sharing from pmap_attributes */
13702         /* but this would under count as only faulted-in mappings would     */
13703         /* show up.                                                         */
13704
13705         if (top_object == TRUE && object->shadow)
13706                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
13707
13708         if (! object->internal)
13709                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
13710
13711         if (m == VM_PAGE_NULL)
13712                 goto done_with_object;
13713
13714         if (m->fictitious) {
13715                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
13716                 goto done_with_object;
13717         }
13718         if (m->dirty || pmap_is_modified(m->phys_page))
13719                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
13720
13721         if (m->reference || pmap_is_referenced(m->phys_page))
13722                 disposition |= VM_PAGE_QUERY_PAGE_REF;
13723
13724         if (m->speculative)
13725                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
13726
13727         if (m->cs_validated)
13728                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
13729         if (m->cs_tainted)
13730                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
13731
13732 done_with_object:
13733         vm_object_unlock(object);
13734 done:
13735
13736         switch (flavor) {
13737         case VM_PAGE_INFO_BASIC:
13738                 basic_info = (vm_page_info_basic_t) info;
13739                 basic_info->disposition = disposition;
13740                 basic_info->ref_count = ref_count;
13741                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
13742                         VM_KERNEL_ADDRPERM(object);
13743                 basic_info->offset =
13744                         (memory_object_offset_t) offset + offset_in_page;
13745                 basic_info->depth = depth;
13746                 break;
13747         }
13748
13749         return retval;
13750 }
13751
13752 /*
13753  *      vm_map_msync
13754  *
13755  *      Synchronises the memory range specified with its backing store
13756  *      image by either flushing or cleaning the contents to the appropriate
13757  *      memory manager engaging in a memory object synchronize dialog with
13758  *      the manager.  The client doesn't return until the manager issues
13759  *      m_o_s_completed message.  MIG Magically converts user task parameter
13760  *      to the task's address map.
13761  *
13762  *      interpretation of sync_flags
13763  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
13764  *                                pages to manager.
13765  *
13766  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
13767  *                              - discard pages, write dirty or precious
13768  *                                pages back to memory manager.
13769  *
13770  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
13771  *                              - write dirty or precious pages back to
13772  *                                the memory manager.
13773  *
13774  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
13775  *                                is a hole in the region, and we would
13776  *                                have returned KERN_SUCCESS, return
13777  *                                KERN_INVALID_ADDRESS instead.
13778  *
13779  *      NOTE
13780  *      The memory object attributes have not yet been implemented, this
13781  *      function will have to deal with the invalidate attribute
13782  *
13783  *      RETURNS
13784  *      KERN_INVALID_TASK               Bad task parameter
13785  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
13786  *      KERN_SUCCESS                    The usual.
13787  *      KERN_INVALID_ADDRESS            There was a hole in the region.
13788  */
13789
13790 kern_return_t
13791 vm_map_msync(
13792         vm_map_t                map,
13793         vm_map_address_t        address,
13794         vm_map_size_t           size,
13795         vm_sync_t               sync_flags)
13796 {
13797         msync_req_t             msr;
13798         msync_req_t             new_msr;
13799         queue_chain_t           req_q;  /* queue of requests for this msync */
13800         vm_map_entry_t          entry;
13801         vm_map_size_t           amount_left;
13802         vm_object_offset_t      offset;
13803         boolean_t               do_sync_req;
13804         boolean_t               had_hole = FALSE;
13805         memory_object_t         pager;
13806
13807         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
13808             (sync_flags & VM_SYNC_SYNCHRONOUS))
13809                 return(KERN_INVALID_ARGUMENT);
13810
13811         /*
13812          * align address and size on page boundaries
13813          */
13814         size = (vm_map_round_page(address + size,
13815                                   VM_MAP_PAGE_MASK(map)) -
13816                 vm_map_trunc_page(address,
13817                                   VM_MAP_PAGE_MASK(map)));
13818         address = vm_map_trunc_page(address,
13819                                     VM_MAP_PAGE_MASK(map));
13820
13821         if (map == VM_MAP_NULL)
13822                 return(KERN_INVALID_TASK);
13823
13824         if (size == 0)
13825                 return(KERN_SUCCESS);
13826
13827         queue_init(&req_q);
13828         amount_left = size;
13829
13830         while (amount_left > 0) {
13831                 vm_object_size_t        flush_size;
13832                 vm_object_t             object;
13833
13834                 vm_map_lock(map);
13835                 if (!vm_map_lookup_entry(map,
13836                                          vm_map_trunc_page(
13837                                                  address,
13838                                                  VM_MAP_PAGE_MASK(map)),
13839                                          &entry)) {
13840
13841                         vm_map_size_t   skip;
13842
13843                         /*
13844                          * hole in the address map.
13845                          */
13846                         had_hole = TRUE;
13847
13848                         /*
13849                          * Check for empty map.
13850                          */
13851                         if (entry == vm_map_to_entry(map) &&
13852                             entry->vme_next == entry) {
13853                                 vm_map_unlock(map);
13854                                 break;
13855                         }
13856                         /*
13857                          * Check that we don't wrap and that
13858                          * we have at least one real map entry.
13859                          */
13860                         if ((map->hdr.nentries == 0) ||
13861                             (entry->vme_next->vme_start < address)) {
13862                                 vm_map_unlock(map);
13863                                 break;
13864                         }
13865                         /*
13866                          * Move up to the next entry if needed
13867                          */
13868                         skip = (entry->vme_next->vme_start - address);
13869                         if (skip >= amount_left)
13870                                 amount_left = 0;
13871                         else
13872                                 amount_left -= skip;
13873                         address = entry->vme_next->vme_start;
13874                         vm_map_unlock(map);
13875                         continue;
13876                 }
13877
13878                 offset = address - entry->vme_start;
13879
13880                 /*
13881                  * do we have more to flush than is contained in this
13882                  * entry ?
13883                  */
13884                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
13885                         flush_size = entry->vme_end -
13886                                 (entry->vme_start + offset);
13887                 } else {
13888                         flush_size = amount_left;
13889                 }
13890                 amount_left -= flush_size;
13891                 address += flush_size;
13892
13893                 if (entry->is_sub_map == TRUE) {
13894                         vm_map_t        local_map;
13895                         vm_map_offset_t local_offset;
13896
13897                         local_map = entry->object.sub_map;
13898                         local_offset = entry->offset;
13899                         vm_map_unlock(map);
13900                         if (vm_map_msync(
13901                                     local_map,
13902                                     local_offset,
13903                                     flush_size,
13904                                     sync_flags) == KERN_INVALID_ADDRESS) {
13905                                 had_hole = TRUE;
13906                         }
13907                         continue;
13908                 }
13909                 object = entry->object.vm_object;
13910
13911                 /*
13912                  * We can't sync this object if the object has not been
13913                  * created yet
13914                  */
13915                 if (object == VM_OBJECT_NULL) {
13916                         vm_map_unlock(map);
13917                         continue;
13918                 }
13919                 offset += entry->offset;
13920
13921                 vm_object_lock(object);
13922
13923                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
13924                         int kill_pages = 0;
13925                         boolean_t reusable_pages = FALSE;
13926
13927                         if (sync_flags & VM_SYNC_KILLPAGES) {
13928                                 if (object->ref_count == 1 && !object->shadow)
13929                                         kill_pages = 1;
13930                                 else
13931                                         kill_pages = -1;
13932                         }
13933                         if (kill_pages != -1)
13934                                 vm_object_deactivate_pages(object, offset,
13935                                                            (vm_object_size_t)flush_size, kill_pages, reusable_pages);
13936                         vm_object_unlock(object);
13937                         vm_map_unlock(map);
13938                         continue;
13939                 }
13940                 /*
13941                  * We can't sync this object if there isn't a pager.
13942                  * Don't bother to sync internal objects, since there can't
13943                  * be any "permanent" storage for these objects anyway.
13944                  */
13945                 if ((object->pager == MEMORY_OBJECT_NULL) ||
13946                     (object->internal) || (object->private)) {
13947                         vm_object_unlock(object);
13948                         vm_map_unlock(map);
13949                         continue;
13950                 }
13951                 /*
13952                  * keep reference on the object until syncing is done
13953                  */
13954                 vm_object_reference_locked(object);
13955                 vm_object_unlock(object);
13956
13957                 vm_map_unlock(map);
13958
13959                 do_sync_req = vm_object_sync(object,
13960                                              offset,
13961                                              flush_size,
13962                                              sync_flags & VM_SYNC_INVALIDATE,
13963                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
13964                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
13965                                              sync_flags & VM_SYNC_SYNCHRONOUS);
13966                 /*
13967                  * only send a m_o_s if we returned pages or if the entry
13968                  * is writable (ie dirty pages may have already been sent back)
13969                  */
13970                 if (!do_sync_req) {
13971                         if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
13972                                 /*
13973                                  * clear out the clustering and read-ahead hints
13974                                  */
13975                                 vm_object_lock(object);
13976
13977                                 object->pages_created = 0;
13978                                 object->pages_used = 0;
13979                                 object->sequential = 0;
13980                                 object->last_alloc = 0;
13981
13982                                 vm_object_unlock(object);
13983                         }
13984                         vm_object_deallocate(object);
13985                         continue;
13986                 }
13987                 msync_req_alloc(new_msr);
13988
13989                 vm_object_lock(object);
13990                 offset += object->paging_offset;
13991
13992                 new_msr->offset = offset;
13993                 new_msr->length = flush_size;
13994                 new_msr->object = object;
13995                 new_msr->flag = VM_MSYNC_SYNCHRONIZING;
13996         re_iterate:
13997
13998                 /*
13999                  * We can't sync this object if there isn't a pager.  The
14000                  * pager can disappear anytime we're not holding the object
14001                  * lock.  So this has to be checked anytime we goto re_iterate.
14002                  */
14003
14004                 pager = object->pager;
14005
14006                 if (pager == MEMORY_OBJECT_NULL) {
14007                         vm_object_unlock(object);
14008                         vm_object_deallocate(object);
14009                         msync_req_free(new_msr);
14010                         new_msr = NULL;
14011                         continue;
14012                 }
14013
14014                 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
14015                         /*
14016                          * need to check for overlapping entry, if found, wait
14017                          * on overlapping msr to be done, then reiterate
14018                          */
14019                         msr_lock(msr);
14020                         if (msr->flag == VM_MSYNC_SYNCHRONIZING &&
14021                             ((offset >= msr->offset &&
14022                               offset < (msr->offset + msr->length)) ||
14023                              (msr->offset >= offset &&
14024                               msr->offset < (offset + flush_size))))
14025                         {
14026                                 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE);
14027                                 msr_unlock(msr);
14028                                 vm_object_unlock(object);
14029                                 thread_block(THREAD_CONTINUE_NULL);
14030                                 vm_object_lock(object);
14031                                 goto re_iterate;
14032                         }
14033                         msr_unlock(msr);
14034                 }/* queue_iterate */
14035
14036                 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q);
14037
14038                 vm_object_paging_begin(object);
14039                 vm_object_unlock(object);
14040
14041                 queue_enter(&req_q, new_msr, msync_req_t, req_q);
14042
14043                 (void) memory_object_synchronize(
14044                         pager,
14045                         offset,
14046                         flush_size,
14047                         sync_flags & ~VM_SYNC_CONTIGUOUS);
14048
14049                 vm_object_lock(object);
14050                 vm_object_paging_end(object);
14051                 vm_object_unlock(object);
14052         }/* while */
14053
14054         /*
14055          * wait for memory_object_sychronize_completed messages from pager(s)
14056          */
14057
14058         while (!queue_empty(&req_q)) {
14059                 msr = (msync_req_t)queue_first(&req_q);
14060                 msr_lock(msr);
14061                 while(msr->flag != VM_MSYNC_DONE) {
14062                         assert_wait((event_t) msr, THREAD_INTERRUPTIBLE);
14063                         msr_unlock(msr);
14064                         thread_block(THREAD_CONTINUE_NULL);
14065                         msr_lock(msr);
14066                 }/* while */
14067                 queue_remove(&req_q, msr, msync_req_t, req_q);
14068                 msr_unlock(msr);
14069                 vm_object_deallocate(msr->object);
14070                 msync_req_free(msr);
14071         }/* queue_iterate */
14072
14073         /* for proper msync() behaviour */
14074         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
14075                 return(KERN_INVALID_ADDRESS);
14076
14077         return(KERN_SUCCESS);
14078 }/* vm_msync */
14079
14080 /*
14081  *      Routine:        convert_port_entry_to_map
14082  *      Purpose:
14083  *              Convert from a port specifying an entry or a task
14084  *              to a map. Doesn't consume the port ref; produces a map ref,
14085  *              which may be null.  Unlike convert_port_to_map, the
14086  *              port may be task or a named entry backed.
14087  *      Conditions:
14088  *              Nothing locked.
14089  */
14090
14091
14092 vm_map_t
14093 convert_port_entry_to_map(
14094         ipc_port_t      port)
14095 {
14096         vm_map_t map;
14097         vm_named_entry_t        named_entry;
14098         uint32_t        try_failed_count = 0;
14099
14100         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14101                 while(TRUE) {
14102                         ip_lock(port);
14103                         if(ip_active(port) && (ip_kotype(port)
14104                                                == IKOT_NAMED_ENTRY)) {
14105                                 named_entry =
14106                                         (vm_named_entry_t)port->ip_kobject;
14107                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14108                                         ip_unlock(port);
14109
14110                                         try_failed_count++;
14111                                         mutex_pause(try_failed_count);
14112                                         continue;
14113                                 }
14114                                 named_entry->ref_count++;
14115                                 lck_mtx_unlock(&(named_entry)->Lock);
14116                                 ip_unlock(port);
14117                                 if ((named_entry->is_sub_map) &&
14118                                     (named_entry->protection
14119                                      & VM_PROT_WRITE)) {
14120                                         map = named_entry->backing.map;
14121                                 } else {
14122                                         mach_destroy_memory_entry(port);
14123                                         return VM_MAP_NULL;
14124                                 }
14125                                 vm_map_reference_swap(map);
14126                                 mach_destroy_memory_entry(port);
14127                                 break;
14128                         }
14129                         else
14130                                 return VM_MAP_NULL;
14131                 }
14132         }
14133         else
14134                 map = convert_port_to_map(port);
14135
14136         return map;
14137 }
14138
14139 /*
14140  *      Routine:        convert_port_entry_to_object
14141  *      Purpose:
14142  *              Convert from a port specifying a named entry to an
14143  *              object. Doesn't consume the port ref; produces a map ref,
14144  *              which may be null.
14145  *      Conditions:
14146  *              Nothing locked.
14147  */
14148
14149
14150 vm_object_t
14151 convert_port_entry_to_object(
14152         ipc_port_t      port)
14153 {
14154         vm_object_t             object = VM_OBJECT_NULL;
14155         vm_named_entry_t        named_entry;
14156         uint32_t                try_failed_count = 0;
14157
14158         if (IP_VALID(port) &&
14159             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14160         try_again:
14161                 ip_lock(port);
14162                 if (ip_active(port) &&
14163                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
14164                         named_entry = (vm_named_entry_t)port->ip_kobject;
14165                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
14166                                 ip_unlock(port);
14167                                 try_failed_count++;
14168                                 mutex_pause(try_failed_count);
14169                                 goto try_again;
14170                         }
14171                         named_entry->ref_count++;
14172                         lck_mtx_unlock(&(named_entry)->Lock);
14173                         ip_unlock(port);
14174                         if (!(named_entry->is_sub_map) &&
14175                             !(named_entry->is_pager) &&
14176                             !(named_entry->is_copy) &&
14177                             (named_entry->protection & VM_PROT_WRITE)) {
14178                                 object = named_entry->backing.object;
14179                                 vm_object_reference(object);
14180                         }
14181                         mach_destroy_memory_entry(port);
14182                 }
14183         }
14184
14185         return object;
14186 }
14187
14188 /*
14189  * Export routines to other components for the things we access locally through
14190  * macros.
14191  */
14192 #undef current_map
14193 vm_map_t
14194 current_map(void)
14195 {
14196         return (current_map_fast());
14197 }
14198
14199 /*
14200  *      vm_map_reference:
14201  *
14202  *      Most code internal to the osfmk will go through a
14203  *      macro defining this.  This is always here for the
14204  *      use of other kernel components.
14205  */
14206 #undef vm_map_reference
14207 void
14208 vm_map_reference(
14209         register vm_map_t       map)
14210 {
14211         if (map == VM_MAP_NULL)
14212                 return;
14213
14214         lck_mtx_lock(&map->s_lock);
14215 #if     TASK_SWAPPER
14216         assert(map->res_count > 0);
14217         assert(map->ref_count >= map->res_count);
14218         map->res_count++;
14219 #endif
14220         map->ref_count++;
14221         lck_mtx_unlock(&map->s_lock);
14222 }
14223
14224 /*
14225  *      vm_map_deallocate:
14226  *
14227  *      Removes a reference from the specified map,
14228  *      destroying it if no references remain.
14229  *      The map should not be locked.
14230  */
14231 void
14232 vm_map_deallocate(
14233         register vm_map_t       map)
14234 {
14235         unsigned int            ref;
14236
14237         if (map == VM_MAP_NULL)
14238                 return;
14239
14240         lck_mtx_lock(&map->s_lock);
14241         ref = --map->ref_count;
14242         if (ref > 0) {
14243                 vm_map_res_deallocate(map);
14244                 lck_mtx_unlock(&map->s_lock);
14245                 return;
14246         }
14247         assert(map->ref_count == 0);
14248         lck_mtx_unlock(&map->s_lock);
14249
14250 #if     TASK_SWAPPER
14251         /*
14252          * The map residence count isn't decremented here because
14253          * the vm_map_delete below will traverse the entire map,
14254          * deleting entries, and the residence counts on objects
14255          * and sharing maps will go away then.
14256          */
14257 #endif
14258
14259         vm_map_destroy(map, VM_MAP_NO_FLAGS);
14260 }
14261
14262
14263 void
14264 vm_map_disable_NX(vm_map_t map)
14265 {
14266         if (map == NULL)
14267                 return;
14268         if (map->pmap == NULL)
14269                 return;
14270
14271         pmap_disable_NX(map->pmap);
14272 }
14273
14274 void
14275 vm_map_disallow_data_exec(vm_map_t map)
14276 {
14277     if (map == NULL)
14278         return;
14279
14280     map->map_disallow_data_exec = TRUE;
14281 }
14282
14283 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
14284  * more descriptive.
14285  */
14286 void
14287 vm_map_set_32bit(vm_map_t map)
14288 {
14289         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
14290 }
14291
14292
14293 void
14294 vm_map_set_64bit(vm_map_t map)
14295 {
14296         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
14297 }
14298
14299 vm_map_offset_t
14300 vm_compute_max_offset(unsigned is64)
14301 {
14302         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
14303 }
14304
14305 uint64_t
14306 vm_map_get_max_aslr_slide_pages(vm_map_t map)
14307 {
14308         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
14309 }
14310
14311 boolean_t
14312 vm_map_is_64bit(
14313                 vm_map_t map)
14314 {
14315         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
14316 }
14317
14318 boolean_t
14319 vm_map_has_hard_pagezero(
14320                 vm_map_t        map,
14321                 vm_map_offset_t pagezero_size)
14322 {
14323         /*
14324          * XXX FBDP
14325          * We should lock the VM map (for read) here but we can get away
14326          * with it for now because there can't really be any race condition:
14327          * the VM map's min_offset is changed only when the VM map is created
14328          * and when the zero page is established (when the binary gets loaded),
14329          * and this routine gets called only when the task terminates and the
14330          * VM map is being torn down, and when a new map is created via
14331          * load_machfile()/execve().
14332          */
14333         return (map->min_offset >= pagezero_size);
14334 }
14335
14336 /*
14337  * Raise a VM map's maximun offset.
14338  */
14339 kern_return_t
14340 vm_map_raise_max_offset(
14341         vm_map_t        map,
14342         vm_map_offset_t new_max_offset)
14343 {
14344         kern_return_t   ret;
14345
14346         vm_map_lock(map);
14347         ret = KERN_INVALID_ADDRESS;
14348
14349         if (new_max_offset >= map->max_offset) {
14350                 if (!vm_map_is_64bit(map)) {
14351                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
14352                                 map->max_offset = new_max_offset;
14353                                 ret = KERN_SUCCESS;
14354                         }
14355                 } else {
14356                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
14357                                 map->max_offset = new_max_offset;
14358                                 ret = KERN_SUCCESS;
14359                         }
14360                 }
14361         }
14362
14363         vm_map_unlock(map);
14364         return ret;
14365 }
14366
14367
14368 /*
14369  * Raise a VM map's minimum offset.
14370  * To strictly enforce "page zero" reservation.
14371  */
14372 kern_return_t
14373 vm_map_raise_min_offset(
14374         vm_map_t        map,
14375         vm_map_offset_t new_min_offset)
14376 {
14377         vm_map_entry_t  first_entry;
14378
14379         new_min_offset = vm_map_round_page(new_min_offset,
14380                                            VM_MAP_PAGE_MASK(map));
14381
14382         vm_map_lock(map);
14383
14384         if (new_min_offset < map->min_offset) {
14385                 /*
14386                  * Can't move min_offset backwards, as that would expose
14387                  * a part of the address space that was previously, and for
14388                  * possibly good reasons, inaccessible.
14389                  */
14390                 vm_map_unlock(map);
14391                 return KERN_INVALID_ADDRESS;
14392         }
14393
14394         first_entry = vm_map_first_entry(map);
14395         if (first_entry != vm_map_to_entry(map) &&
14396             first_entry->vme_start < new_min_offset) {
14397                 /*
14398                  * Some memory was already allocated below the new
14399                  * minimun offset.  It's too late to change it now...
14400                  */
14401                 vm_map_unlock(map);
14402                 return KERN_NO_SPACE;
14403         }
14404
14405         map->min_offset = new_min_offset;
14406
14407         vm_map_unlock(map);
14408
14409         return KERN_SUCCESS;
14410 }
14411
14412 /*
14413  * Set the limit on the maximum amount of user wired memory allowed for this map.
14414  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
14415  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
14416  * don't have to reach over to the BSD data structures.
14417  */
14418
14419 void
14420 vm_map_set_user_wire_limit(vm_map_t     map,
14421                            vm_size_t    limit)
14422 {
14423         map->user_wire_limit = limit;
14424 }
14425
14426
14427 void vm_map_switch_protect(vm_map_t     map,
14428                            boolean_t    val)
14429 {
14430         vm_map_lock(map);
14431         map->switch_protect=val;
14432         vm_map_unlock(map);
14433 }
14434
14435 /*
14436  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
14437  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
14438  * bump both counters.
14439  */
14440 void
14441 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
14442 {
14443         pmap_t pmap = vm_map_pmap(map);
14444
14445         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
14446         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
14447 }
14448
14449 void
14450 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
14451 {
14452         pmap_t pmap = vm_map_pmap(map);
14453
14454         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
14455         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
14456 }
14457
14458 /* Add (generate) code signature for memory range */
14459 #if CONFIG_DYNAMIC_CODE_SIGNING
14460 kern_return_t vm_map_sign(vm_map_t map,
14461                  vm_map_offset_t start,
14462                  vm_map_offset_t end)
14463 {
14464         vm_map_entry_t entry;
14465         vm_page_t m;
14466         vm_object_t object;
14467
14468         /*
14469          * Vet all the input parameters and current type and state of the
14470          * underlaying object.  Return with an error if anything is amiss.
14471          */
14472         if (map == VM_MAP_NULL)
14473                 return(KERN_INVALID_ARGUMENT);
14474
14475         vm_map_lock_read(map);
14476
14477         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
14478                 /*
14479                  * Must pass a valid non-submap address.
14480                  */
14481                 vm_map_unlock_read(map);
14482                 return(KERN_INVALID_ADDRESS);
14483         }
14484
14485         if((entry->vme_start > start) || (entry->vme_end < end)) {
14486                 /*
14487                  * Map entry doesn't cover the requested range. Not handling
14488                  * this situation currently.
14489                  */
14490                 vm_map_unlock_read(map);
14491                 return(KERN_INVALID_ARGUMENT);
14492         }
14493
14494         object = entry->object.vm_object;
14495         if (object == VM_OBJECT_NULL) {
14496                 /*
14497                  * Object must already be present or we can't sign.
14498                  */
14499                 vm_map_unlock_read(map);
14500                 return KERN_INVALID_ARGUMENT;
14501         }
14502
14503         vm_object_lock(object);
14504         vm_map_unlock_read(map);
14505
14506         while(start < end) {
14507                 uint32_t refmod;
14508
14509                 m = vm_page_lookup(object, start - entry->vme_start + entry->offset );
14510                 if (m==VM_PAGE_NULL) {
14511                         /* shoud we try to fault a page here? we can probably
14512                          * demand it exists and is locked for this request */
14513                         vm_object_unlock(object);
14514                         return KERN_FAILURE;
14515                 }
14516                 /* deal with special page status */
14517                 if (m->busy ||
14518                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
14519                         vm_object_unlock(object);
14520                         return KERN_FAILURE;
14521                 }
14522
14523                 /* Page is OK... now "validate" it */
14524                 /* This is the place where we'll call out to create a code
14525                  * directory, later */
14526                 m->cs_validated = TRUE;
14527
14528                 /* The page is now "clean" for codesigning purposes. That means
14529                  * we don't consider it as modified (wpmapped) anymore. But
14530                  * we'll disconnect the page so we note any future modification
14531                  * attempts. */
14532                 m->wpmapped = FALSE;
14533                 refmod = pmap_disconnect(m->phys_page);
14534
14535                 /* Pull the dirty status from the pmap, since we cleared the
14536                  * wpmapped bit */
14537                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
14538                         SET_PAGE_DIRTY(m, FALSE);
14539                 }
14540
14541                 /* On to the next page */
14542                 start += PAGE_SIZE;
14543         }
14544         vm_object_unlock(object);
14545
14546         return KERN_SUCCESS;
14547 }
14548 #endif
14549
14550 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
14551 {
14552         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
14553         vm_map_entry_t next_entry;
14554         kern_return_t   kr = KERN_SUCCESS;
14555         vm_map_t        zap_map;
14556
14557         vm_map_lock(map);
14558
14559         /*
14560          * We use a "zap_map" to avoid having to unlock
14561          * the "map" in vm_map_delete().
14562          */
14563         zap_map = vm_map_create(PMAP_NULL,
14564                                 map->min_offset,
14565                                 map->max_offset,
14566                                 map->hdr.entries_pageable);
14567
14568         if (zap_map == VM_MAP_NULL) {
14569                 return KERN_RESOURCE_SHORTAGE;
14570         }
14571
14572         vm_map_set_page_shift(zap_map,
14573                               VM_MAP_PAGE_SHIFT(map));
14574
14575         for (entry = vm_map_first_entry(map);
14576              entry != vm_map_to_entry(map);
14577              entry = next_entry) {
14578                 next_entry = entry->vme_next;
14579
14580                 if (entry->object.vm_object && !entry->is_sub_map && (entry->object.vm_object->internal == TRUE)
14581                     && (entry->object.vm_object->ref_count == 1)) {
14582
14583                         *reclaimed_resident += entry->object.vm_object->resident_page_count;
14584                         *reclaimed_compressed += vm_compressor_pager_get_count(entry->object.vm_object->pager);
14585
14586                         (void)vm_map_delete(map,
14587                                             entry->vme_start,
14588                                             entry->vme_end,
14589                                             VM_MAP_REMOVE_SAVE_ENTRIES,
14590                                             zap_map);
14591                 }
14592         }
14593
14594         vm_map_unlock(map);
14595
14596         /*
14597          * Get rid of the "zap_maps" and all the map entries that
14598          * they may still contain.
14599          */
14600         if (zap_map != VM_MAP_NULL) {
14601                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
14602                 zap_map = VM_MAP_NULL;
14603         }
14604
14605         return kr;
14606 }
14607
14608 #if CONFIG_FREEZE
14609
14610 kern_return_t vm_map_freeze_walk(
14611                 vm_map_t map,
14612                 unsigned int *purgeable_count,
14613                 unsigned int *wired_count,
14614                 unsigned int *clean_count,
14615                 unsigned int *dirty_count,
14616                 unsigned int  dirty_budget,
14617                 boolean_t *has_shared)
14618 {
14619         vm_map_entry_t entry;
14620
14621         vm_map_lock_read(map);
14622
14623         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
14624         *has_shared = FALSE;
14625
14626         for (entry = vm_map_first_entry(map);
14627              entry != vm_map_to_entry(map);
14628              entry = entry->vme_next) {
14629                 unsigned int purgeable, clean, dirty, wired;
14630                 boolean_t shared;
14631
14632                 if ((entry->object.vm_object == 0) ||
14633                     (entry->is_sub_map) ||
14634                     (entry->object.vm_object->phys_contiguous)) {
14635                         continue;
14636                 }
14637
14638                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL);
14639
14640                 *purgeable_count += purgeable;
14641                 *wired_count += wired;
14642                 *clean_count += clean;
14643                 *dirty_count += dirty;
14644
14645                 if (shared) {
14646                         *has_shared = TRUE;
14647                 }
14648
14649                 /* Adjust pageout budget and finish up if reached */
14650                 if (dirty_budget) {
14651                         dirty_budget -= dirty;
14652                         if (dirty_budget == 0) {
14653                                 break;
14654                         }
14655                 }
14656         }
14657
14658         vm_map_unlock_read(map);
14659
14660         return KERN_SUCCESS;
14661 }
14662
14663 kern_return_t vm_map_freeze(
14664                 vm_map_t map,
14665                 unsigned int *purgeable_count,
14666                 unsigned int *wired_count,
14667                 unsigned int *clean_count,
14668                 unsigned int *dirty_count,
14669                 unsigned int dirty_budget,
14670                 boolean_t *has_shared)
14671 {
14672         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
14673         kern_return_t   kr = KERN_SUCCESS;
14674         boolean_t       default_freezer_active = TRUE;
14675
14676         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
14677         *has_shared = FALSE;
14678
14679         /*
14680          * We need the exclusive lock here so that we can
14681          * block any page faults or lookups while we are
14682          * in the middle of freezing this vm map.
14683          */
14684         vm_map_lock(map);
14685
14686         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14687                 default_freezer_active = FALSE;
14688         }
14689
14690         if (default_freezer_active) {
14691                 if (map->default_freezer_handle == NULL) {
14692                         map->default_freezer_handle = default_freezer_handle_allocate();
14693                 }
14694
14695                 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) {
14696                         /*
14697                          * Can happen if default_freezer_handle passed in is NULL
14698                          * Or, a table has already been allocated and associated
14699                          * with this handle, i.e. the map is already frozen.
14700                          */
14701                         goto done;
14702                 }
14703         }
14704
14705         for (entry2 = vm_map_first_entry(map);
14706              entry2 != vm_map_to_entry(map);
14707              entry2 = entry2->vme_next) {
14708
14709                 vm_object_t     src_object = entry2->object.vm_object;
14710
14711                 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) {
14712                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
14713                         if (default_freezer_active) {
14714                                 unsigned int purgeable, clean, dirty, wired;
14715                                 boolean_t shared;
14716
14717                                 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared,
14718                                                                 src_object, map->default_freezer_handle);
14719
14720                                 *purgeable_count += purgeable;
14721                                 *wired_count += wired;
14722                                 *clean_count += clean;
14723                                 *dirty_count += dirty;
14724
14725                                 /* Adjust pageout budget and finish up if reached */
14726                                 if (dirty_budget) {
14727                                         dirty_budget -= dirty;
14728                                         if (dirty_budget == 0) {
14729                                                 break;
14730                                         }
14731                                 }
14732
14733                                 if (shared) {
14734                                         *has_shared = TRUE;
14735                                 }
14736                         } else {
14737                                 /*
14738                                  * To the compressor.
14739                                  */
14740                                 if (entry2->object.vm_object->internal == TRUE) {
14741                                         vm_object_pageout(entry2->object.vm_object);
14742                                 }
14743                         }
14744                 }
14745         }
14746
14747         if (default_freezer_active) {
14748                 /* Finally, throw out the pages to swap */
14749                 default_freezer_pageout(map->default_freezer_handle);
14750         }
14751
14752 done:
14753         vm_map_unlock(map);
14754
14755         return kr;
14756 }
14757
14758 kern_return_t
14759 vm_map_thaw(
14760         vm_map_t map)
14761 {
14762         kern_return_t kr = KERN_SUCCESS;
14763
14764         if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
14765                 /*
14766                  * We will on-demand thaw in the presence of the compressed pager.
14767                  */
14768                 return kr;
14769         }
14770
14771         vm_map_lock(map);
14772
14773         if (map->default_freezer_handle == NULL) {
14774                 /*
14775                  * This map is not in a frozen state.
14776                  */
14777                 kr = KERN_FAILURE;
14778                 goto out;
14779         }
14780
14781         kr = default_freezer_unpack(map->default_freezer_handle);
14782 out:
14783         vm_map_unlock(map);
14784
14785         return kr;
14786 }
14787 #endif
14788
14789 /*
14790  * vm_map_entry_should_cow_for_true_share:
14791  *
14792  * Determines if the map entry should be clipped and setup for copy-on-write
14793  * to avoid applying "true_share" to a large VM object when only a subset is
14794  * targeted.
14795  *
14796  * For now, we target only the map entries created for the Objective C
14797  * Garbage Collector, which initially have the following properties:
14798  *      - alias == VM_MEMORY_MALLOC
14799  *      - wired_count == 0
14800  *      - !needs_copy
14801  * and a VM object with:
14802  *      - internal
14803  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
14804  *      - !true_share
14805  *      - vo_size == ANON_CHUNK_SIZE
14806  */
14807 boolean_t
14808 vm_map_entry_should_cow_for_true_share(
14809         vm_map_entry_t  entry)
14810 {
14811         vm_object_t     object;
14812
14813         if (entry->is_sub_map) {
14814                 /* entry does not point at a VM object */
14815                 return FALSE;
14816         }
14817
14818         if (entry->needs_copy) {
14819                 /* already set for copy_on_write: done! */
14820                 return FALSE;
14821         }
14822
14823         if (entry->alias != VM_MEMORY_MALLOC &&
14824             entry->alias != VM_MEMORY_MALLOC_SMALL) {
14825                 /* not a malloc heap or Obj-C Garbage Collector heap */
14826                 return FALSE;
14827         }
14828
14829         if (entry->wired_count) {
14830                 /* wired: can't change the map entry... */
14831                 vm_counters.should_cow_but_wired++;
14832                 return FALSE;
14833         }
14834
14835         object = entry->object.vm_object;
14836
14837         if (object == VM_OBJECT_NULL) {
14838                 /* no object yet... */
14839                 return FALSE;
14840         }
14841
14842         if (!object->internal) {
14843                 /* not an internal object */
14844                 return FALSE;
14845         }
14846
14847         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
14848                 /* not the default copy strategy */
14849                 return FALSE;
14850         }
14851
14852         if (object->true_share) {
14853                 /* already true_share: too late to avoid it */
14854                 return FALSE;
14855         }
14856
14857         if (entry->alias == VM_MEMORY_MALLOC &&
14858             object->vo_size != ANON_CHUNK_SIZE) {
14859                 /* ... not an object created for the ObjC Garbage Collector */
14860                 return FALSE;
14861         }
14862
14863         if (entry->alias == VM_MEMORY_MALLOC_SMALL &&
14864             object->vo_size != 2048 * 4096) {
14865                 /* ... not a "MALLOC_SMALL" heap */
14866                 return FALSE;
14867         }
14868
14869         /*
14870          * All the criteria match: we have a large object being targeted for "true_share".
14871          * To limit the adverse side-effects linked with "true_share", tell the caller to
14872          * try and avoid setting up the entire object for "true_share" by clipping the
14873          * targeted range and setting it up for copy-on-write.
14874          */
14875         return TRUE;
14876 }
14877
14878 vm_map_offset_t
14879 vm_map_round_page_mask(
14880         vm_map_offset_t offset,
14881         vm_map_offset_t mask)
14882 {
14883         return VM_MAP_ROUND_PAGE(offset, mask);
14884 }
14885
14886 vm_map_offset_t
14887 vm_map_trunc_page_mask(
14888         vm_map_offset_t offset,
14889         vm_map_offset_t mask)
14890 {
14891         return VM_MAP_TRUNC_PAGE(offset, mask);
14892 }
14893
14894 int
14895 vm_map_page_shift(
14896         vm_map_t map)
14897 {
14898         return VM_MAP_PAGE_SHIFT(map);
14899 }
14900
14901 int
14902 vm_map_page_size(
14903         vm_map_t map)
14904 {
14905         return VM_MAP_PAGE_SIZE(map);
14906 }
14907
14908 int
14909 vm_map_page_mask(
14910         vm_map_t map)
14911 {
14912         return VM_MAP_PAGE_MASK(map);
14913 }
14914
14915 kern_return_t
14916 vm_map_set_page_shift(
14917         vm_map_t        map,
14918         int             pageshift)
14919 {
14920         if (map->hdr.nentries != 0) {
14921                 /* too late to change page size */
14922                 return KERN_FAILURE;
14923         }
14924
14925         map->hdr.page_shift = pageshift;
14926
14927         return KERN_SUCCESS;
14928 }
14929
14930 int
14931 vm_map_purge(
14932         vm_map_t        map)
14933 {
14934         int             num_object_purged;
14935         vm_map_entry_t  entry;
14936         vm_map_offset_t next_address;
14937         vm_object_t     object;
14938         int             state;
14939         kern_return_t   kr;
14940
14941         num_object_purged = 0;
14942
14943         vm_map_lock_read(map);
14944         entry = vm_map_first_entry(map);
14945         while (entry != vm_map_to_entry(map)) {
14946                 if (entry->is_sub_map) {
14947                         goto next;
14948                 }
14949                 if (! (entry->protection & VM_PROT_WRITE)) {
14950                         goto next;
14951                 }
14952                 object = entry->object.vm_object;
14953                 if (object == VM_OBJECT_NULL) {
14954                         goto next;
14955                 }
14956                 if (object->purgable != VM_PURGABLE_VOLATILE) {
14957                         goto next;
14958                 }
14959
14960                 vm_object_lock(object);
14961 #if 00
14962                 if (entry->offset != 0 ||
14963                     (entry->vme_end - entry->vme_start) != object->vo_size) {
14964                         vm_object_unlock(object);
14965                         goto next;
14966                 }
14967 #endif
14968                 next_address = entry->vme_end;
14969                 vm_map_unlock_read(map);
14970                 state = VM_PURGABLE_EMPTY;
14971                 kr = vm_object_purgable_control(object,
14972                                                 VM_PURGABLE_SET_STATE,
14973                                                 &state);
14974                 if (kr == KERN_SUCCESS) {
14975                         num_object_purged++;
14976                 }
14977                 vm_object_unlock(object);
14978
14979                 vm_map_lock_read(map);
14980                 if (vm_map_lookup_entry(map, next_address, &entry)) {
14981                         continue;
14982                 }
14983         next:
14984                 entry = entry->vme_next;
14985         }
14986         vm_map_unlock_read(map);
14987
14988         return num_object_purged;
14989 }
14990
14991 kern_return_t
14992 vm_map_query_volatile(
14993         vm_map_t        map,
14994         mach_vm_size_t  *volatile_virtual_size_p,
14995         mach_vm_size_t  *volatile_resident_size_p,
14996         mach_vm_size_t  *volatile_pmap_size_p)
14997 {
14998         mach_vm_size_t  volatile_virtual_size;
14999         mach_vm_size_t  volatile_resident_count;
15000         mach_vm_size_t  volatile_pmap_count;
15001         mach_vm_size_t  resident_count;
15002         vm_map_entry_t  entry;
15003         vm_object_t     object;
15004
15005         /* map should be locked by caller */
15006
15007         volatile_virtual_size = 0;
15008         volatile_resident_count = 0;
15009         volatile_pmap_count = 0;
15010
15011         for (entry = vm_map_first_entry(map);
15012              entry != vm_map_to_entry(map);
15013              entry = entry->vme_next) {
15014                 if (entry->is_sub_map) {
15015                         continue;
15016                 }
15017                 if (! (entry->protection & VM_PROT_WRITE)) {
15018                         continue;
15019                 }
15020                 object = entry->object.vm_object;
15021                 if (object == VM_OBJECT_NULL) {
15022                         continue;
15023                 }
15024                 if (object->purgable != VM_PURGABLE_VOLATILE) {
15025                         continue;
15026                 }
15027                 if (entry->offset != 0) {
15028                         /*
15029                          * If the map entry has been split and the object now
15030                          * appears several times in the VM map, we don't want
15031                          * to count the object's resident_page_count more than
15032                          * once.  We count it only for the first one, starting
15033                          * at offset 0 and ignore the other VM map entries.
15034                          */
15035                         continue;
15036                 }
15037                 resident_count = object->resident_page_count;
15038                 if ((entry->offset / PAGE_SIZE) >= resident_count) {
15039                         resident_count = 0;
15040                 } else {
15041                         resident_count -= (entry->offset / PAGE_SIZE);
15042                 }
15043
15044                 volatile_virtual_size += entry->vme_end - entry->vme_start;
15045                 volatile_resident_count += resident_count;
15046                 volatile_pmap_count += pmap_query_resident(map->pmap,
15047                                                            entry->vme_start,
15048                                                            entry->vme_end);
15049         }
15050
15051         /* map is still locked on return */
15052
15053         *volatile_virtual_size_p = volatile_virtual_size;
15054         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
15055         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
15056
15057         return KERN_SUCCESS;
15058 }
15059
15060 #if VM_SCAN_FOR_SHADOW_CHAIN
15061 int vm_map_shadow_max(vm_map_t map);
15062 int vm_map_shadow_max(
15063         vm_map_t map)
15064 {
15065         int             shadows, shadows_max;
15066         vm_map_entry_t  entry;
15067         vm_object_t     object, next_object;
15068
15069         if (map == NULL)
15070                 return 0;
15071
15072         shadows_max = 0;
15073
15074         vm_map_lock_read(map);
15075
15076         for (entry = vm_map_first_entry(map);
15077              entry != vm_map_to_entry(map);
15078              entry = entry->vme_next) {
15079                 if (entry->is_sub_map) {
15080                         continue;
15081                 }
15082                 object = entry->object.vm_object;
15083                 if (object == NULL) {
15084                         continue;
15085                 }
15086                 vm_object_lock_shared(object);
15087                 for (shadows = 0;
15088                      object->shadow != NULL;
15089                      shadows++, object = next_object) {
15090                         next_object = object->shadow;
15091                         vm_object_lock_shared(next_object);
15092                         vm_object_unlock(object);
15093                 }
15094                 vm_object_unlock(object);
15095                 if (shadows > shadows_max) {
15096                         shadows_max = shadows;
15097                 }
15098         }
15099
15100         vm_map_unlock_read(map);
15101
15102         return shadows_max;
15103 }
15104 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */