osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/kalloc.h>
  88 #include <kern/zalloc.h>
  89
  90 #include <vm/cpm.h>
  91 #include <vm/vm_compressor_pager.h>
  92 #include <vm/vm_init.h>
  93 #include <vm/vm_fault.h>
  94 #include <vm/vm_map.h>
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_page.h>
  97 #include <vm/vm_pageout.h>
  98 #include <vm/vm_kern.h>
  99 #include <ipc/ipc_port.h>
 100 #include <kern/sched_prim.h>
 101 #include <kern/misc_protos.h>
 102 #include <kern/xpr.h>
 103
 104 #include <mach/vm_map_server.h>
 105 #include <mach/mach_host_server.h>
 106 #include <vm/vm_protos.h>
 107 #include <vm/vm_purgeable_internal.h>
 108
 109 #include <vm/vm_protos.h>
 110 #include <vm/vm_shared_region.h>
 111 #include <vm/vm_map_store.h>
 112
 113 #include <san/kasan.h>
 114
 115 #if __arm64__
 116 extern int fourk_binary_compatibility_unsafe;
 117 extern int fourk_binary_compatibility_allow_wx;
 118 #endif /* __arm64__ */
 119 extern int proc_selfpid(void);
 120 extern char *proc_name_address(void *p);
 121
 122 #if VM_MAP_DEBUG_APPLE_PROTECT
 123 int vm_map_debug_apple_protect = 0;
 124 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 125 #if VM_MAP_DEBUG_FOURK
 126 int vm_map_debug_fourk = 0;
 127 #endif /* VM_MAP_DEBUG_FOURK */
 128
 129 int vm_map_executable_immutable = 0;
 130 int vm_map_executable_immutable_no_log = 0;
 131
 132 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 133 /* Internal prototypes
 134  */
 135
 136 static void vm_map_simplify_range(
 137         vm_map_t        map,
 138         vm_map_offset_t start,
 139         vm_map_offset_t end);   /* forward */
 140
 141 static boolean_t        vm_map_range_check(
 142         vm_map_t        map,
 143         vm_map_offset_t start,
 144         vm_map_offset_t end,
 145         vm_map_entry_t  *entry);
 146
 147 static vm_map_entry_t   _vm_map_entry_create(
 148         struct vm_map_header    *map_header, boolean_t map_locked);
 149
 150 static void             _vm_map_entry_dispose(
 151         struct vm_map_header    *map_header,
 152         vm_map_entry_t          entry);
 153
 154 static void             vm_map_pmap_enter(
 155         vm_map_t                map,
 156         vm_map_offset_t         addr,
 157         vm_map_offset_t         end_addr,
 158         vm_object_t             object,
 159         vm_object_offset_t      offset,
 160         vm_prot_t               protection);
 161
 162 static void             _vm_map_clip_end(
 163         struct vm_map_header    *map_header,
 164         vm_map_entry_t          entry,
 165         vm_map_offset_t         end);
 166
 167 static void             _vm_map_clip_start(
 168         struct vm_map_header    *map_header,
 169         vm_map_entry_t          entry,
 170         vm_map_offset_t         start);
 171
 172 static void             vm_map_entry_delete(
 173         vm_map_t        map,
 174         vm_map_entry_t  entry);
 175
 176 static kern_return_t    vm_map_delete(
 177         vm_map_t        map,
 178         vm_map_offset_t start,
 179         vm_map_offset_t end,
 180         int             flags,
 181         vm_map_t        zap_map);
 182
 183 static kern_return_t    vm_map_copy_overwrite_unaligned(
 184         vm_map_t        dst_map,
 185         vm_map_entry_t  entry,
 186         vm_map_copy_t   copy,
 187         vm_map_address_t start,
 188         boolean_t       discard_on_success);
 189
 190 static kern_return_t    vm_map_copy_overwrite_aligned(
 191         vm_map_t        dst_map,
 192         vm_map_entry_t  tmp_entry,
 193         vm_map_copy_t   copy,
 194         vm_map_offset_t start,
 195         pmap_t          pmap);
 196
 197 static kern_return_t    vm_map_copyin_kernel_buffer(
 198         vm_map_t        src_map,
 199         vm_map_address_t src_addr,
 200         vm_map_size_t   len,
 201         boolean_t       src_destroy,
 202         vm_map_copy_t   *copy_result);  /* OUT */
 203
 204 static kern_return_t    vm_map_copyout_kernel_buffer(
 205         vm_map_t        map,
 206         vm_map_address_t *addr, /* IN/OUT */
 207         vm_map_copy_t   copy,
 208         vm_map_size_t   copy_size,
 209         boolean_t       overwrite,
 210         boolean_t       consume_on_success);
 211
 212 static void             vm_map_fork_share(
 213         vm_map_t        old_map,
 214         vm_map_entry_t  old_entry,
 215         vm_map_t        new_map);
 216
 217 static boolean_t        vm_map_fork_copy(
 218         vm_map_t        old_map,
 219         vm_map_entry_t  *old_entry_p,
 220         vm_map_t        new_map,
 221         int             vm_map_copyin_flags);
 222
 223 void            vm_map_region_top_walk(
 224         vm_map_entry_t             entry,
 225         vm_region_top_info_t       top);
 226
 227 void            vm_map_region_walk(
 228         vm_map_t                   map,
 229         vm_map_offset_t            va,
 230         vm_map_entry_t             entry,
 231         vm_object_offset_t         offset,
 232         vm_object_size_t           range,
 233         vm_region_extended_info_t  extended,
 234         boolean_t                  look_for_pages,
 235         mach_msg_type_number_t count);
 236
 237 static kern_return_t    vm_map_wire_nested(
 238         vm_map_t                   map,
 239         vm_map_offset_t            start,
 240         vm_map_offset_t            end,
 241         vm_prot_t                  caller_prot,
 242         vm_tag_t                   tag,
 243         boolean_t                  user_wire,
 244         pmap_t                     map_pmap,
 245         vm_map_offset_t            pmap_addr,
 246         ppnum_t                    *physpage_p);
 247
 248 static kern_return_t    vm_map_unwire_nested(
 249         vm_map_t                   map,
 250         vm_map_offset_t            start,
 251         vm_map_offset_t            end,
 252         boolean_t                  user_wire,
 253         pmap_t                     map_pmap,
 254         vm_map_offset_t            pmap_addr);
 255
 256 static kern_return_t    vm_map_overwrite_submap_recurse(
 257         vm_map_t                   dst_map,
 258         vm_map_offset_t            dst_addr,
 259         vm_map_size_t              dst_size);
 260
 261 static kern_return_t    vm_map_copy_overwrite_nested(
 262         vm_map_t                   dst_map,
 263         vm_map_offset_t            dst_addr,
 264         vm_map_copy_t              copy,
 265         boolean_t                  interruptible,
 266         pmap_t                     pmap,
 267         boolean_t                  discard_on_success);
 268
 269 static kern_return_t    vm_map_remap_extract(
 270         vm_map_t                map,
 271         vm_map_offset_t         addr,
 272         vm_map_size_t           size,
 273         boolean_t               copy,
 274         struct vm_map_header    *map_header,
 275         vm_prot_t               *cur_protection,
 276         vm_prot_t               *max_protection,
 277         vm_inherit_t            inheritance,
 278         boolean_t               pageable,
 279         boolean_t               same_map,
 280         vm_map_kernel_flags_t   vmk_flags);
 281
 282 static kern_return_t    vm_map_remap_range_allocate(
 283         vm_map_t                map,
 284         vm_map_address_t        *address,
 285         vm_map_size_t           size,
 286         vm_map_offset_t         mask,
 287         int                     flags,
 288         vm_map_kernel_flags_t   vmk_flags,
 289         vm_tag_t                tag,
 290         vm_map_entry_t          *map_entry);
 291
 292 static void             vm_map_region_look_for_page(
 293         vm_map_t                   map,
 294         vm_map_offset_t            va,
 295         vm_object_t                object,
 296         vm_object_offset_t         offset,
 297         int                        max_refcnt,
 298         int                        depth,
 299         vm_region_extended_info_t  extended,
 300         mach_msg_type_number_t count);
 301
 302 static int              vm_map_region_count_obj_refs(
 303         vm_map_entry_t             entry,
 304         vm_object_t                object);
 305
 306
 307 static kern_return_t    vm_map_willneed(
 308         vm_map_t        map,
 309         vm_map_offset_t start,
 310         vm_map_offset_t end);
 311
 312 static kern_return_t    vm_map_reuse_pages(
 313         vm_map_t        map,
 314         vm_map_offset_t start,
 315         vm_map_offset_t end);
 316
 317 static kern_return_t    vm_map_reusable_pages(
 318         vm_map_t        map,
 319         vm_map_offset_t start,
 320         vm_map_offset_t end);
 321
 322 static kern_return_t    vm_map_can_reuse(
 323         vm_map_t        map,
 324         vm_map_offset_t start,
 325         vm_map_offset_t end);
 326
 327 #if MACH_ASSERT
 328 static kern_return_t    vm_map_pageout(
 329         vm_map_t        map,
 330         vm_map_offset_t start,
 331         vm_map_offset_t end);
 332 #endif /* MACH_ASSERT */
 333
 334 pid_t find_largest_process_vm_map_entries(void);
 335
 336 /*
 337  * Macros to copy a vm_map_entry. We must be careful to correctly
 338  * manage the wired page count. vm_map_entry_copy() creates a new
 339  * map entry to the same memory - the wired count in the new entry
 340  * must be set to zero. vm_map_entry_copy_full() creates a new
 341  * entry that is identical to the old entry.  This preserves the
 342  * wire count; it's used for map splitting and zone changing in
 343  * vm_map_copyout.
 344  */
 345
 346 #define vm_map_entry_copy(NEW,OLD)      \
 347 MACRO_BEGIN                             \
 348 boolean_t _vmec_reserved = (NEW)->from_reserved_zone;   \
 349         *(NEW) = *(OLD);                \
 350         (NEW)->is_shared = FALSE;       \
 351         (NEW)->needs_wakeup = FALSE;    \
 352         (NEW)->in_transition = FALSE;   \
 353         (NEW)->wired_count = 0;         \
 354         (NEW)->user_wired_count = 0;    \
 355         (NEW)->permanent = FALSE;       \
 356         (NEW)->used_for_jit = FALSE;    \
 357         (NEW)->from_reserved_zone = _vmec_reserved;     \
 358         if ((NEW)->iokit_acct) {                        \
 359              assertf(!(NEW)->use_pmap, "old %p new %p\n", (OLD), (NEW)); \
 360              (NEW)->iokit_acct = FALSE;                 \
 361              (NEW)->use_pmap = TRUE;                    \
 362         }                                               \
 363         (NEW)->vme_resilient_codesign = FALSE; \
 364         (NEW)->vme_resilient_media = FALSE;     \
 365         (NEW)->vme_atomic = FALSE;      \
 366 MACRO_END
 367
 368 #define vm_map_entry_copy_full(NEW,OLD)                 \
 369 MACRO_BEGIN                                             \
 370 boolean_t _vmecf_reserved = (NEW)->from_reserved_zone;  \
 371 (*(NEW) = *(OLD));                                      \
 372 (NEW)->from_reserved_zone = _vmecf_reserved;                    \
 373 MACRO_END
 374
 375 /*
 376  *      Decide if we want to allow processes to execute from their data or stack areas.
 377  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 378  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 379  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 380  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 381  *      specific pmap files since the default behavior varies according to architecture.  The
 382  *      main reason it varies is because of the need to provide binary compatibility with old
 383  *      applications that were written before these restrictions came into being.  In the old
 384  *      days, an app could execute anything it could read, but this has slowly been tightened
 385  *      up over time.  The default behavior is:
 386  *
 387  *      32-bit PPC apps         may execute from both stack and data areas
 388  *      32-bit Intel apps       may exeucte from data areas but not stack
 389  *      64-bit PPC/Intel apps   may not execute from either data or stack
 390  *
 391  *      An application on any architecture may override these defaults by explicitly
 392  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 393  *      system call.  This code here just determines what happens when an app tries to
 394  *      execute from a page that lacks execute permission.
 395  *
 396  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 397  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 398  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 399  *      execution from data areas for a particular binary even if the arch normally permits it. As
 400  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 401  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 402  *      are not all NX-safe.
 403  */
 404
 405 extern int allow_data_exec, allow_stack_exec;
 406
 407 int
 408 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 409 {
 410         int current_abi;
 411
 412         if (map->pmap == kernel_pmap) return FALSE;
 413
 414         /*
 415          * Determine if the app is running in 32 or 64 bit mode.
 416          */
 417
 418         if (vm_map_is_64bit(map))
 419                 current_abi = VM_ABI_64;
 420         else
 421                 current_abi = VM_ABI_32;
 422
 423         /*
 424          * Determine if we should allow the execution based on whether it's a
 425          * stack or data area and the current architecture.
 426          */
 427
 428         if (user_tag == VM_MEMORY_STACK)
 429                 return allow_stack_exec & current_abi;
 430
 431         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 432 }
 433
 434
 435 /*
 436  *      Virtual memory maps provide for the mapping, protection,
 437  *      and sharing of virtual memory objects.  In addition,
 438  *      this module provides for an efficient virtual copy of
 439  *      memory from one map to another.
 440  *
 441  *      Synchronization is required prior to most operations.
 442  *
 443  *      Maps consist of an ordered doubly-linked list of simple
 444  *      entries; a single hint is used to speed up lookups.
 445  *
 446  *      Sharing maps have been deleted from this version of Mach.
 447  *      All shared objects are now mapped directly into the respective
 448  *      maps.  This requires a change in the copy on write strategy;
 449  *      the asymmetric (delayed) strategy is used for shared temporary
 450  *      objects instead of the symmetric (shadow) strategy.  All maps
 451  *      are now "top level" maps (either task map, kernel map or submap
 452  *      of the kernel map).
 453  *
 454  *      Since portions of maps are specified by start/end addreses,
 455  *      which may not align with existing map entries, all
 456  *      routines merely "clip" entries to these start/end values.
 457  *      [That is, an entry is split into two, bordering at a
 458  *      start or end value.]  Note that these clippings may not
 459  *      always be necessary (as the two resulting entries are then
 460  *      not changed); however, the clipping is done for convenience.
 461  *      No attempt is currently made to "glue back together" two
 462  *      abutting entries.
 463  *
 464  *      The symmetric (shadow) copy strategy implements virtual copy
 465  *      by copying VM object references from one map to
 466  *      another, and then marking both regions as copy-on-write.
 467  *      It is important to note that only one writeable reference
 468  *      to a VM object region exists in any map when this strategy
 469  *      is used -- this means that shadow object creation can be
 470  *      delayed until a write operation occurs.  The symmetric (delayed)
 471  *      strategy allows multiple maps to have writeable references to
 472  *      the same region of a vm object, and hence cannot delay creating
 473  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 474  *      Copying of permanent objects is completely different; see
 475  *      vm_object_copy_strategically() in vm_object.c.
 476  */
 477
 478 static zone_t   vm_map_zone;                            /* zone for vm_map structures */
 479 zone_t                  vm_map_entry_zone;                      /* zone for vm_map_entry structures */
 480 static zone_t   vm_map_entry_reserved_zone;     /* zone with reserve for non-blocking allocations */
 481 static zone_t   vm_map_copy_zone;                       /* zone for vm_map_copy structures */
 482 zone_t                  vm_map_holes_zone;                      /* zone for vm map holes (vm_map_links) structures */
 483
 484
 485 /*
 486  *      Placeholder object for submap operations.  This object is dropped
 487  *      into the range by a call to vm_map_find, and removed when
 488  *      vm_map_submap creates the submap.
 489  */
 490
 491 vm_object_t     vm_submap_object;
 492
 493 static void             *map_data;
 494 static vm_size_t        map_data_size;
 495 static void             *kentry_data;
 496 static vm_size_t        kentry_data_size;
 497 static void             *map_holes_data;
 498 static vm_size_t        map_holes_data_size;
 499
 500 #if CONFIG_EMBEDDED
 501 #define         NO_COALESCE_LIMIT  0
 502 #else
 503 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 504 #endif
 505
 506 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 507 unsigned int not_in_kdp = 1;
 508
 509 unsigned int vm_map_set_cache_attr_count = 0;
 510
 511 kern_return_t
 512 vm_map_set_cache_attr(
 513         vm_map_t        map,
 514         vm_map_offset_t va)
 515 {
 516         vm_map_entry_t  map_entry;
 517         vm_object_t     object;
 518         kern_return_t   kr = KERN_SUCCESS;
 519
 520         vm_map_lock_read(map);
 521
 522         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 523             map_entry->is_sub_map) {
 524                 /*
 525                  * that memory is not properly mapped
 526                  */
 527                 kr = KERN_INVALID_ARGUMENT;
 528                 goto done;
 529         }
 530         object = VME_OBJECT(map_entry);
 531
 532         if (object == VM_OBJECT_NULL) {
 533                 /*
 534                  * there should be a VM object here at this point
 535                  */
 536                 kr = KERN_INVALID_ARGUMENT;
 537                 goto done;
 538         }
 539         vm_object_lock(object);
 540         object->set_cache_attr = TRUE;
 541         vm_object_unlock(object);
 542
 543         vm_map_set_cache_attr_count++;
 544 done:
 545         vm_map_unlock_read(map);
 546
 547         return kr;
 548 }
 549
 550
 551 #if CONFIG_CODE_DECRYPTION
 552 /*
 553  * vm_map_apple_protected:
 554  * This remaps the requested part of the object with an object backed by
 555  * the decrypting pager.
 556  * crypt_info contains entry points and session data for the crypt module.
 557  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 558  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 559  */
 560 kern_return_t
 561 vm_map_apple_protected(
 562         vm_map_t                map,
 563         vm_map_offset_t         start,
 564         vm_map_offset_t         end,
 565         vm_object_offset_t      crypto_backing_offset,
 566         struct pager_crypt_info *crypt_info)
 567 {
 568         boolean_t       map_locked;
 569         kern_return_t   kr;
 570         vm_map_entry_t  map_entry;
 571         struct vm_map_entry tmp_entry;
 572         memory_object_t unprotected_mem_obj;
 573         vm_object_t     protected_object;
 574         vm_map_offset_t map_addr;
 575         vm_map_offset_t start_aligned, end_aligned;
 576         vm_object_offset_t      crypto_start, crypto_end;
 577         int             vm_flags;
 578         vm_map_kernel_flags_t vmk_flags;
 579
 580         vm_flags = 0;
 581         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 582
 583         map_locked = FALSE;
 584         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 585
 586         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 587         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 588         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 589         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 590
 591 #if __arm64__
 592         /*
 593          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 594          * so we might have to loop and establish up to 3 mappings:
 595          *
 596          * + the first 16K-page, which might overlap with the previous
 597          *   4K-aligned mapping,
 598          * + the center,
 599          * + the last 16K-page, which might overlap with the next
 600          *   4K-aligned mapping.
 601          * Each of these mapping might be backed by a vnode pager (if
 602          * properly page-aligned) or a "fourk_pager", itself backed by a
 603          * vnode pager (if 4K-aligned but not page-aligned).
 604          */
 605 #else /* __arm64__ */
 606         assert(start_aligned == start);
 607         assert(end_aligned == end);
 608 #endif /* __arm64__ */
 609
 610         map_addr = start_aligned;
 611         for (map_addr = start_aligned;
 612              map_addr < end;
 613              map_addr = tmp_entry.vme_end) {
 614                 vm_map_lock(map);
 615                 map_locked = TRUE;
 616
 617                 /* lookup the protected VM object */
 618                 if (!vm_map_lookup_entry(map,
 619                                          map_addr,
 620                                          &map_entry) ||
 621                     map_entry->is_sub_map ||
 622                     VME_OBJECT(map_entry) == VM_OBJECT_NULL ||
 623                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 624                         /* that memory is not properly mapped */
 625                         kr = KERN_INVALID_ARGUMENT;
 626                         goto done;
 627                 }
 628
 629                 /* get the protected object to be decrypted */
 630                 protected_object = VME_OBJECT(map_entry);
 631                 if (protected_object == VM_OBJECT_NULL) {
 632                         /* there should be a VM object here at this point */
 633                         kr = KERN_INVALID_ARGUMENT;
 634                         goto done;
 635                 }
 636                 /* ensure protected object stays alive while map is unlocked */
 637                 vm_object_reference(protected_object);
 638
 639                 /* limit the map entry to the area we want to cover */
 640                 vm_map_clip_start(map, map_entry, start_aligned);
 641                 vm_map_clip_end(map, map_entry, end_aligned);
 642
 643                 tmp_entry = *map_entry;
 644                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 645                 vm_map_unlock(map);
 646                 map_locked = FALSE;
 647
 648                 /*
 649                  * This map entry might be only partially encrypted
 650                  * (if not fully "page-aligned").
 651                  */
 652                 crypto_start = 0;
 653                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 654                 if (tmp_entry.vme_start < start) {
 655                         if (tmp_entry.vme_start != start_aligned) {
 656                                 kr = KERN_INVALID_ADDRESS;
 657                         }
 658                         crypto_start += (start - tmp_entry.vme_start);
 659                 }
 660                 if (tmp_entry.vme_end > end) {
 661                         if (tmp_entry.vme_end != end_aligned) {
 662                                 kr = KERN_INVALID_ADDRESS;
 663                         }
 664                         crypto_end -= (tmp_entry.vme_end - end);
 665                 }
 666
 667                 /*
 668                  * This "extra backing offset" is needed to get the decryption
 669                  * routine to use the right key.  It adjusts for the possibly
 670                  * relative offset of an interposed "4K" pager...
 671                  */
 672                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 673                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 674                 }
 675
 676                 /*
 677                  * Lookup (and create if necessary) the protected memory object
 678                  * matching that VM object.
 679                  * If successful, this also grabs a reference on the memory object,
 680                  * to guarantee that it doesn't go away before we get a chance to map
 681                  * it.
 682                  */
 683                 unprotected_mem_obj = apple_protect_pager_setup(
 684                         protected_object,
 685                         VME_OFFSET(&tmp_entry),
 686                         crypto_backing_offset,
 687                         crypt_info,
 688                         crypto_start,
 689                         crypto_end);
 690
 691                 /* release extra ref on protected object */
 692                 vm_object_deallocate(protected_object);
 693
 694                 if (unprotected_mem_obj == NULL) {
 695                         kr = KERN_FAILURE;
 696                         goto done;
 697                 }
 698
 699                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 700                 /* can overwrite an immutable mapping */
 701                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 702 #if __arm64__
 703                 if (tmp_entry.used_for_jit &&
 704                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 705                      PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 706                     fourk_binary_compatibility_unsafe &&
 707                     fourk_binary_compatibility_allow_wx) {
 708                         printf("** FOURK_COMPAT [%d]: "
 709                                "allowing write+execute at 0x%llx\n",
 710                                proc_selfpid(), tmp_entry.vme_start);
 711                         vmk_flags.vmkf_map_jit = TRUE;
 712                 }
 713 #endif /* __arm64__ */
 714
 715                 /* map this memory object in place of the current one */
 716                 map_addr = tmp_entry.vme_start;
 717                 kr = vm_map_enter_mem_object(map,
 718                                              &map_addr,
 719                                              (tmp_entry.vme_end -
 720                                               tmp_entry.vme_start),
 721                                              (mach_vm_offset_t) 0,
 722                                              vm_flags,
 723                                              vmk_flags,
 724                                              VM_KERN_MEMORY_NONE,
 725                                              (ipc_port_t) unprotected_mem_obj,
 726                                              0,
 727                                              TRUE,
 728                                              tmp_entry.protection,
 729                                              tmp_entry.max_protection,
 730                                              tmp_entry.inheritance);
 731                 assertf(kr == KERN_SUCCESS,
 732                         "kr = 0x%x\n", kr);
 733                 assertf(map_addr == tmp_entry.vme_start,
 734                         "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 735                         (uint64_t)map_addr,
 736                         (uint64_t) tmp_entry.vme_start,
 737                         &tmp_entry);
 738
 739 #if VM_MAP_DEBUG_APPLE_PROTECT
 740                 if (vm_map_debug_apple_protect) {
 741                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 742                                " backing:[object:%p,offset:0x%llx,"
 743                                "crypto_backing_offset:0x%llx,"
 744                                "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 745                                map,
 746                                (uint64_t) map_addr,
 747                                (uint64_t) (map_addr + (tmp_entry.vme_end -
 748                                                        tmp_entry.vme_start)),
 749                                unprotected_mem_obj,
 750                                protected_object,
 751                                VME_OFFSET(&tmp_entry),
 752                                crypto_backing_offset,
 753                                crypto_start,
 754                                crypto_end);
 755                 }
 756 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 757
 758                 /*
 759                  * Release the reference obtained by
 760                  * apple_protect_pager_setup().
 761                  * The mapping (if it succeeded) is now holding a reference on
 762                  * the memory object.
 763                  */
 764                 memory_object_deallocate(unprotected_mem_obj);
 765                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 766
 767                 /* continue with next map entry */
 768                 crypto_backing_offset += (tmp_entry.vme_end -
 769                                           tmp_entry.vme_start);
 770                 crypto_backing_offset -= crypto_start;
 771         }
 772         kr = KERN_SUCCESS;
 773
 774 done:
 775         if (map_locked) {
 776                 vm_map_unlock(map);
 777         }
 778         return kr;
 779 }
 780 #endif  /* CONFIG_CODE_DECRYPTION */
 781
 782
 783 lck_grp_t               vm_map_lck_grp;
 784 lck_grp_attr_t  vm_map_lck_grp_attr;
 785 lck_attr_t              vm_map_lck_attr;
 786 lck_attr_t              vm_map_lck_rw_attr;
 787
 788
 789 /*
 790  *      vm_map_init:
 791  *
 792  *      Initialize the vm_map module.  Must be called before
 793  *      any other vm_map routines.
 794  *
 795  *      Map and entry structures are allocated from zones -- we must
 796  *      initialize those zones.
 797  *
 798  *      There are three zones of interest:
 799  *
 800  *      vm_map_zone:            used to allocate maps.
 801  *      vm_map_entry_zone:      used to allocate map entries.
 802  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
 803  *
 804  *      The kernel allocates map entries from a special zone that is initially
 805  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
 806  *      the kernel to allocate more memory to a entry zone when it became
 807  *      empty since the very act of allocating memory implies the creation
 808  *      of a new entry.
 809  */
 810 void
 811 vm_map_init(
 812         void)
 813 {
 814         vm_size_t entry_zone_alloc_size;
 815         const char *mez_name = "VM map entries";
 816
 817         vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024,
 818                             PAGE_SIZE, "maps");
 819         zone_change(vm_map_zone, Z_NOENCRYPT, TRUE);
 820 #if     defined(__LP64__)
 821         entry_zone_alloc_size = PAGE_SIZE * 5;
 822 #else
 823         entry_zone_alloc_size = PAGE_SIZE * 6;
 824 #endif
 825         vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 826                                   1024*1024, entry_zone_alloc_size,
 827                                   mez_name);
 828         zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE);
 829         zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE);
 830         zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE);
 831
 832         vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry),
 833                                    kentry_data_size * 64, kentry_data_size,
 834                                    "Reserved VM map entries");
 835         zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE);
 836         /* Don't quarantine because we always need elements available */
 837         zone_change(vm_map_entry_reserved_zone, Z_KASAN_QUARANTINE, FALSE);
 838
 839         vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy),
 840                                  16*1024, PAGE_SIZE, "VM map copies");
 841         zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE);
 842
 843         vm_map_holes_zone = zinit((vm_map_size_t) sizeof(struct vm_map_links),
 844                                  16*1024, PAGE_SIZE, "VM map holes");
 845         zone_change(vm_map_holes_zone, Z_NOENCRYPT, TRUE);
 846
 847         /*
 848          *      Cram the map and kentry zones with initial data.
 849          *      Set reserved_zone non-collectible to aid zone_gc().
 850          */
 851         zone_change(vm_map_zone, Z_COLLECT, FALSE);
 852         zone_change(vm_map_zone, Z_FOREIGN, TRUE);
 853         zone_change(vm_map_zone, Z_GZALLOC_EXEMPT, TRUE);
 854
 855         zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE);
 856         zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE);
 857         zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE);
 858         zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE);
 859         zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 860         zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */
 861         zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE);
 862
 863         zone_change(vm_map_holes_zone, Z_COLLECT, TRUE);
 864         zone_change(vm_map_holes_zone, Z_EXPAND, TRUE);
 865         zone_change(vm_map_holes_zone, Z_FOREIGN, TRUE);
 866         zone_change(vm_map_holes_zone, Z_NOCALLOUT, TRUE);
 867         zone_change(vm_map_holes_zone, Z_CALLERACCT, TRUE);
 868         zone_change(vm_map_holes_zone, Z_GZALLOC_EXEMPT, TRUE);
 869
 870         /*
 871          * Add the stolen memory to zones, adjust zone size and stolen counts.
 872          * zcram only up to the maximum number of pages for each zone chunk.
 873          */
 874         zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size);
 875
 876         const vm_size_t stride = ZONE_CHUNK_MAXPAGES * PAGE_SIZE;
 877         for (vm_offset_t off = 0; off < kentry_data_size; off += stride) {
 878                 zcram(vm_map_entry_reserved_zone,
 879                                 (vm_offset_t)kentry_data + off,
 880                                 MIN(kentry_data_size - off, stride));
 881         }
 882         for (vm_offset_t off = 0; off < map_holes_data_size; off += stride) {
 883                 zcram(vm_map_holes_zone,
 884                                 (vm_offset_t)map_holes_data + off,
 885                                 MIN(map_holes_data_size - off, stride));
 886         }
 887
 888         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
 889
 890         lck_grp_attr_setdefault(&vm_map_lck_grp_attr);
 891         lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr);
 892         lck_attr_setdefault(&vm_map_lck_attr);
 893
 894         lck_attr_setdefault(&vm_map_lck_rw_attr);
 895         lck_attr_cleardebug(&vm_map_lck_rw_attr);
 896
 897 #if VM_MAP_DEBUG_APPLE_PROTECT
 898         PE_parse_boot_argn("vm_map_debug_apple_protect",
 899                            &vm_map_debug_apple_protect,
 900                            sizeof(vm_map_debug_apple_protect));
 901 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 902 #if VM_MAP_DEBUG_APPLE_FOURK
 903         PE_parse_boot_argn("vm_map_debug_fourk",
 904                            &vm_map_debug_fourk,
 905                            sizeof(vm_map_debug_fourk));
 906 #endif /* VM_MAP_DEBUG_FOURK */
 907         PE_parse_boot_argn("vm_map_executable_immutable",
 908                            &vm_map_executable_immutable,
 909                            sizeof(vm_map_executable_immutable));
 910         PE_parse_boot_argn("vm_map_executable_immutable_no_log",
 911                            &vm_map_executable_immutable_no_log,
 912                            sizeof(vm_map_executable_immutable_no_log));
 913 }
 914
 915 void
 916 vm_map_steal_memory(
 917         void)
 918 {
 919         uint32_t kentry_initial_pages;
 920
 921         map_data_size = round_page(10 * sizeof(struct _vm_map));
 922         map_data = pmap_steal_memory(map_data_size);
 923
 924         /*
 925          * kentry_initial_pages corresponds to the number of kernel map entries
 926          * required during bootstrap until the asynchronous replenishment
 927          * scheme is activated and/or entries are available from the general
 928          * map entry pool.
 929          */
 930 #if     defined(__LP64__)
 931         kentry_initial_pages = 10;
 932 #else
 933         kentry_initial_pages = 6;
 934 #endif
 935
 936 #if CONFIG_GZALLOC
 937         /* If using the guard allocator, reserve more memory for the kernel
 938          * reserved map entry pool.
 939         */
 940         if (gzalloc_enabled())
 941                 kentry_initial_pages *= 1024;
 942 #endif
 943
 944         kentry_data_size = kentry_initial_pages * PAGE_SIZE;
 945         kentry_data = pmap_steal_memory(kentry_data_size);
 946
 947         map_holes_data_size = kentry_data_size;
 948         map_holes_data = pmap_steal_memory(map_holes_data_size);
 949 }
 950
 951 boolean_t vm_map_supports_hole_optimization = FALSE;
 952
 953 void
 954 vm_kernel_reserved_entry_init(void) {
 955         zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry));
 956
 957         /*
 958          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
 959          */
 960         zone_prio_refill_configure(vm_map_holes_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_links));
 961         vm_map_supports_hole_optimization = TRUE;
 962 }
 963
 964 void
 965 vm_map_disable_hole_optimization(vm_map_t map)
 966 {
 967         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
 968
 969         if (map->holelistenabled) {
 970
 971                 head_entry = hole_entry = (vm_map_entry_t) map->holes_list;
 972
 973                 while (hole_entry != NULL) {
 974
 975                         next_hole_entry = hole_entry->vme_next;
 976
 977                         hole_entry->vme_next = NULL;
 978                         hole_entry->vme_prev = NULL;
 979                         zfree(vm_map_holes_zone, hole_entry);
 980
 981                         if (next_hole_entry == head_entry) {
 982                                 hole_entry = NULL;
 983                         } else {
 984                                 hole_entry = next_hole_entry;
 985                         }
 986                 }
 987
 988                 map->holes_list = NULL;
 989                 map->holelistenabled = FALSE;
 990
 991                 map->first_free = vm_map_first_entry(map);
 992                 SAVE_HINT_HOLE_WRITE(map, NULL);
 993         }
 994 }
 995
 996 boolean_t
 997 vm_kernel_map_is_kernel(vm_map_t map) {
 998         return (map->pmap == kernel_pmap);
 999 }
1000
1001 /*
1002  *      vm_map_create:
1003  *
1004  *      Creates and returns a new empty VM map with
1005  *      the given physical map structure, and having
1006  *      the given lower and upper address bounds.
1007  */
1008
1009 vm_map_t
1010 vm_map_create(
1011         pmap_t                  pmap,
1012         vm_map_offset_t min,
1013         vm_map_offset_t max,
1014         boolean_t               pageable)
1015 {
1016         static int              color_seed = 0;
1017         vm_map_t        result;
1018         struct vm_map_links     *hole_entry = NULL;
1019
1020         result = (vm_map_t) zalloc(vm_map_zone);
1021         if (result == VM_MAP_NULL)
1022                 panic("vm_map_create");
1023
1024         vm_map_first_entry(result) = vm_map_to_entry(result);
1025         vm_map_last_entry(result)  = vm_map_to_entry(result);
1026         result->hdr.nentries = 0;
1027         result->hdr.entries_pageable = pageable;
1028
1029         vm_map_store_init( &(result->hdr) );
1030
1031         result->hdr.page_shift = PAGE_SHIFT;
1032
1033         result->size = 0;
1034         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1035         result->user_wire_size  = 0;
1036 #if __x86_64__
1037         result->vmmap_high_start = 0;
1038 #endif /* __x86_64__ */
1039         result->ref_count = 1;
1040 #if     TASK_SWAPPER
1041         result->res_count = 1;
1042         result->sw_state = MAP_SW_IN;
1043 #endif  /* TASK_SWAPPER */
1044         result->pmap = pmap;
1045         result->min_offset = min;
1046         result->max_offset = max;
1047         result->wiring_required = FALSE;
1048         result->no_zero_fill = FALSE;
1049         result->mapped_in_other_pmaps = FALSE;
1050         result->wait_for_space = FALSE;
1051         result->switch_protect = FALSE;
1052         result->disable_vmentry_reuse = FALSE;
1053         result->map_disallow_data_exec = FALSE;
1054         result->is_nested_map = FALSE;
1055         result->highest_entry_end = 0;
1056         result->first_free = vm_map_to_entry(result);
1057         result->hint = vm_map_to_entry(result);
1058         result->color_rr = (color_seed++) & vm_color_mask;
1059         result->jit_entry_exists = FALSE;
1060
1061         if (vm_map_supports_hole_optimization) {
1062                 hole_entry = zalloc(vm_map_holes_zone);
1063
1064                 hole_entry->start = min;
1065 #if defined(__arm__) || defined(__arm64__)
1066                 hole_entry->end = result->max_offset;
1067 #else
1068                 hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1069 #endif
1070                 result->holes_list = result->hole_hint = hole_entry;
1071                 hole_entry->prev = hole_entry->next = (vm_map_entry_t) hole_entry;
1072                 result->holelistenabled = TRUE;
1073
1074         } else {
1075
1076                 result->holelistenabled = FALSE;
1077         }
1078
1079         vm_map_lock_init(result);
1080         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1081
1082         return(result);
1083 }
1084
1085 /*
1086  *      vm_map_entry_create:    [ internal use only ]
1087  *
1088  *      Allocates a VM map entry for insertion in the
1089  *      given map (or map copy).  No fields are filled.
1090  */
1091 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1092
1093 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1094         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1095 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1096
1097 static vm_map_entry_t
1098 _vm_map_entry_create(
1099         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1100 {
1101         zone_t  zone;
1102         vm_map_entry_t  entry;
1103
1104         zone = vm_map_entry_zone;
1105
1106         assert(map_header->entries_pageable ? !map_locked : TRUE);
1107
1108         if (map_header->entries_pageable) {
1109                 entry = (vm_map_entry_t) zalloc(zone);
1110         }
1111         else {
1112                 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE);
1113
1114                 if (entry == VM_MAP_ENTRY_NULL) {
1115                         zone = vm_map_entry_reserved_zone;
1116                         entry = (vm_map_entry_t) zalloc(zone);
1117                         OSAddAtomic(1, &reserved_zalloc_count);
1118                 } else
1119                         OSAddAtomic(1, &nonreserved_zalloc_count);
1120         }
1121
1122         if (entry == VM_MAP_ENTRY_NULL)
1123                 panic("vm_map_entry_create");
1124         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1125
1126         vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1127 #if     MAP_ENTRY_CREATION_DEBUG
1128         entry->vme_creation_maphdr = map_header;
1129         backtrace(&entry->vme_creation_bt[0],
1130                   (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t)));
1131 #endif
1132         return(entry);
1133 }
1134
1135 /*
1136  *      vm_map_entry_dispose:   [ internal use only ]
1137  *
1138  *      Inverse of vm_map_entry_create.
1139  *
1140  *      write map lock held so no need to
1141  *      do anything special to insure correctness
1142  *      of the stores
1143  */
1144 #define vm_map_entry_dispose(map, entry)                        \
1145         _vm_map_entry_dispose(&(map)->hdr, (entry))
1146
1147 #define vm_map_copy_entry_dispose(map, entry) \
1148         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1149
1150 static void
1151 _vm_map_entry_dispose(
1152         struct vm_map_header    *map_header,
1153         vm_map_entry_t          entry)
1154 {
1155         zone_t          zone;
1156
1157         if (map_header->entries_pageable || !(entry->from_reserved_zone))
1158                 zone = vm_map_entry_zone;
1159         else
1160                 zone = vm_map_entry_reserved_zone;
1161
1162         if (!map_header->entries_pageable) {
1163                 if (zone == vm_map_entry_zone)
1164                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1165                 else
1166                         OSAddAtomic(-1, &reserved_zalloc_count);
1167         }
1168
1169         zfree(zone, entry);
1170 }
1171
1172 #if MACH_ASSERT
1173 static boolean_t first_free_check = FALSE;
1174 boolean_t
1175 first_free_is_valid(
1176         vm_map_t        map)
1177 {
1178         if (!first_free_check)
1179                 return TRUE;
1180
1181         return( first_free_is_valid_store( map ));
1182 }
1183 #endif /* MACH_ASSERT */
1184
1185
1186 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1187         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1188
1189 #define vm_map_copy_entry_unlink(copy, entry)                           \
1190         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1191
1192 #if     MACH_ASSERT && TASK_SWAPPER
1193 /*
1194  *      vm_map_res_reference:
1195  *
1196  *      Adds another valid residence count to the given map.
1197  *
1198  *      Map is locked so this function can be called from
1199  *      vm_map_swapin.
1200  *
1201  */
1202 void vm_map_res_reference(vm_map_t map)
1203 {
1204         /* assert map is locked */
1205         assert(map->res_count >= 0);
1206         assert(map->ref_count >= map->res_count);
1207         if (map->res_count == 0) {
1208                 lck_mtx_unlock(&map->s_lock);
1209                 vm_map_lock(map);
1210                 vm_map_swapin(map);
1211                 lck_mtx_lock(&map->s_lock);
1212                 ++map->res_count;
1213                 vm_map_unlock(map);
1214         } else
1215                 ++map->res_count;
1216 }
1217
1218 /*
1219  *      vm_map_reference_swap:
1220  *
1221  *      Adds valid reference and residence counts to the given map.
1222  *
1223  *      The map may not be in memory (i.e. zero residence count).
1224  *
1225  */
1226 void vm_map_reference_swap(vm_map_t map)
1227 {
1228         assert(map != VM_MAP_NULL);
1229         lck_mtx_lock(&map->s_lock);
1230         assert(map->res_count >= 0);
1231         assert(map->ref_count >= map->res_count);
1232         map->ref_count++;
1233         vm_map_res_reference(map);
1234         lck_mtx_unlock(&map->s_lock);
1235 }
1236
1237 /*
1238  *      vm_map_res_deallocate:
1239  *
1240  *      Decrement residence count on a map; possibly causing swapout.
1241  *
1242  *      The map must be in memory (i.e. non-zero residence count).
1243  *
1244  *      The map is locked, so this function is callable from vm_map_deallocate.
1245  *
1246  */
1247 void vm_map_res_deallocate(vm_map_t map)
1248 {
1249         assert(map->res_count > 0);
1250         if (--map->res_count == 0) {
1251                 lck_mtx_unlock(&map->s_lock);
1252                 vm_map_lock(map);
1253                 vm_map_swapout(map);
1254                 vm_map_unlock(map);
1255                 lck_mtx_lock(&map->s_lock);
1256         }
1257         assert(map->ref_count >= map->res_count);
1258 }
1259 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1260
1261 /*
1262  *      vm_map_destroy:
1263  *
1264  *      Actually destroy a map.
1265  */
1266 void
1267 vm_map_destroy(
1268         vm_map_t        map,
1269         int             flags)
1270 {
1271         vm_map_lock(map);
1272
1273         /* final cleanup: no need to unnest shared region */
1274         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1275         /* final cleanup: ok to remove immutable mappings */
1276         flags |= VM_MAP_REMOVE_IMMUTABLE;
1277
1278         /* clean up regular map entries */
1279         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1280                              flags, VM_MAP_NULL);
1281         /* clean up leftover special mappings (commpage, etc...) */
1282 #if     !defined(__arm__) && !defined(__arm64__)
1283         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1284                              flags, VM_MAP_NULL);
1285 #endif /* !__arm__ && !__arm64__ */
1286
1287         vm_map_disable_hole_optimization(map);
1288         vm_map_unlock(map);
1289
1290         assert(map->hdr.nentries == 0);
1291
1292         if(map->pmap)
1293                 pmap_destroy(map->pmap);
1294
1295         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1296                 /*
1297                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1298                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1299                  * structure or kalloc'ed via lck_mtx_init.
1300                  * An example is s_lock_ext within struct _vm_map.
1301                  *
1302                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1303                  * can add another tag to detect embedded vs alloc'ed indirect external
1304                  * mutexes but that'll be additional checks in the lock path and require
1305                  * updating dependencies for the old vs new tag.
1306                  *
1307                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1308                  * just when lock debugging is ON, we choose to forego explicitly destroying
1309                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1310                  * count on vm_map_lck_grp, which has no serious side-effect.
1311                  */
1312         } else {
1313                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1314                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1315         }
1316
1317         zfree(vm_map_zone, map);
1318 }
1319
1320 /*
1321  * Returns pid of the task with the largest number of VM map entries.
1322  * Used in the zone-map-exhaustion jetsam path.
1323  */
1324 pid_t
1325 find_largest_process_vm_map_entries(void)
1326 {
1327         pid_t victim_pid = -1;
1328         int max_vm_map_entries = 0;
1329         task_t task = TASK_NULL;
1330         queue_head_t *task_list = &tasks;
1331
1332         lck_mtx_lock(&tasks_threads_lock);
1333         queue_iterate(task_list, task, task_t, tasks) {
1334                 if (task == kernel_task || !task->active)
1335                         continue;
1336
1337                 vm_map_t task_map = task->map;
1338                 if (task_map != VM_MAP_NULL) {
1339                         int task_vm_map_entries = task_map->hdr.nentries;
1340                         if (task_vm_map_entries > max_vm_map_entries) {
1341                                 max_vm_map_entries = task_vm_map_entries;
1342                                 victim_pid = pid_from_task(task);
1343                         }
1344                 }
1345         }
1346         lck_mtx_unlock(&tasks_threads_lock);
1347
1348         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1349         return victim_pid;
1350 }
1351
1352 #if     TASK_SWAPPER
1353 /*
1354  * vm_map_swapin/vm_map_swapout
1355  *
1356  * Swap a map in and out, either referencing or releasing its resources.
1357  * These functions are internal use only; however, they must be exported
1358  * because they may be called from macros, which are exported.
1359  *
1360  * In the case of swapout, there could be races on the residence count,
1361  * so if the residence count is up, we return, assuming that a
1362  * vm_map_deallocate() call in the near future will bring us back.
1363  *
1364  * Locking:
1365  *      -- We use the map write lock for synchronization among races.
1366  *      -- The map write lock, and not the simple s_lock, protects the
1367  *         swap state of the map.
1368  *      -- If a map entry is a share map, then we hold both locks, in
1369  *         hierarchical order.
1370  *
1371  * Synchronization Notes:
1372  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1373  *      will block on the map lock and proceed when swapout is through.
1374  *      2) A vm_map_reference() call at this time is illegal, and will
1375  *      cause a panic.  vm_map_reference() is only allowed on resident
1376  *      maps, since it refuses to block.
1377  *      3) A vm_map_swapin() call during a swapin will block, and
1378  *      proceeed when the first swapin is done, turning into a nop.
1379  *      This is the reason the res_count is not incremented until
1380  *      after the swapin is complete.
1381  *      4) There is a timing hole after the checks of the res_count, before
1382  *      the map lock is taken, during which a swapin may get the lock
1383  *      before a swapout about to happen.  If this happens, the swapin
1384  *      will detect the state and increment the reference count, causing
1385  *      the swapout to be a nop, thereby delaying it until a later
1386  *      vm_map_deallocate.  If the swapout gets the lock first, then
1387  *      the swapin will simply block until the swapout is done, and
1388  *      then proceed.
1389  *
1390  * Because vm_map_swapin() is potentially an expensive operation, it
1391  * should be used with caution.
1392  *
1393  * Invariants:
1394  *      1) A map with a residence count of zero is either swapped, or
1395  *         being swapped.
1396  *      2) A map with a non-zero residence count is either resident,
1397  *         or being swapped in.
1398  */
1399
1400 int vm_map_swap_enable = 1;
1401
1402 void vm_map_swapin (vm_map_t map)
1403 {
1404         vm_map_entry_t entry;
1405
1406         if (!vm_map_swap_enable)        /* debug */
1407                 return;
1408
1409         /*
1410          * Map is locked
1411          * First deal with various races.
1412          */
1413         if (map->sw_state == MAP_SW_IN)
1414                 /*
1415                  * we raced with swapout and won.  Returning will incr.
1416                  * the res_count, turning the swapout into a nop.
1417                  */
1418                 return;
1419
1420         /*
1421          * The residence count must be zero.  If we raced with another
1422          * swapin, the state would have been IN; if we raced with a
1423          * swapout (after another competing swapin), we must have lost
1424          * the race to get here (see above comment), in which case
1425          * res_count is still 0.
1426          */
1427         assert(map->res_count == 0);
1428
1429         /*
1430          * There are no intermediate states of a map going out or
1431          * coming in, since the map is locked during the transition.
1432          */
1433         assert(map->sw_state == MAP_SW_OUT);
1434
1435         /*
1436          * We now operate upon each map entry.  If the entry is a sub-
1437          * or share-map, we call vm_map_res_reference upon it.
1438          * If the entry is an object, we call vm_object_res_reference
1439          * (this may iterate through the shadow chain).
1440          * Note that we hold the map locked the entire time,
1441          * even if we get back here via a recursive call in
1442          * vm_map_res_reference.
1443          */
1444         entry = vm_map_first_entry(map);
1445
1446         while (entry != vm_map_to_entry(map)) {
1447                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1448                         if (entry->is_sub_map) {
1449                                 vm_map_t lmap = VME_SUBMAP(entry);
1450                                 lck_mtx_lock(&lmap->s_lock);
1451                                 vm_map_res_reference(lmap);
1452                                 lck_mtx_unlock(&lmap->s_lock);
1453                         } else {
1454                                 vm_object_t object = VME_OBEJCT(entry);
1455                                 vm_object_lock(object);
1456                                 /*
1457                                  * This call may iterate through the
1458                                  * shadow chain.
1459                                  */
1460                                 vm_object_res_reference(object);
1461                                 vm_object_unlock(object);
1462                         }
1463                 }
1464                 entry = entry->vme_next;
1465         }
1466         assert(map->sw_state == MAP_SW_OUT);
1467         map->sw_state = MAP_SW_IN;
1468 }
1469
1470 void vm_map_swapout(vm_map_t map)
1471 {
1472         vm_map_entry_t entry;
1473
1474         /*
1475          * Map is locked
1476          * First deal with various races.
1477          * If we raced with a swapin and lost, the residence count
1478          * will have been incremented to 1, and we simply return.
1479          */
1480         lck_mtx_lock(&map->s_lock);
1481         if (map->res_count != 0) {
1482                 lck_mtx_unlock(&map->s_lock);
1483                 return;
1484         }
1485         lck_mtx_unlock(&map->s_lock);
1486
1487         /*
1488          * There are no intermediate states of a map going out or
1489          * coming in, since the map is locked during the transition.
1490          */
1491         assert(map->sw_state == MAP_SW_IN);
1492
1493         if (!vm_map_swap_enable)
1494                 return;
1495
1496         /*
1497          * We now operate upon each map entry.  If the entry is a sub-
1498          * or share-map, we call vm_map_res_deallocate upon it.
1499          * If the entry is an object, we call vm_object_res_deallocate
1500          * (this may iterate through the shadow chain).
1501          * Note that we hold the map locked the entire time,
1502          * even if we get back here via a recursive call in
1503          * vm_map_res_deallocate.
1504          */
1505         entry = vm_map_first_entry(map);
1506
1507         while (entry != vm_map_to_entry(map)) {
1508                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1509                         if (entry->is_sub_map) {
1510                                 vm_map_t lmap = VME_SUBMAP(entry);
1511                                 lck_mtx_lock(&lmap->s_lock);
1512                                 vm_map_res_deallocate(lmap);
1513                                 lck_mtx_unlock(&lmap->s_lock);
1514                         } else {
1515                                 vm_object_t object = VME_OBJECT(entry);
1516                                 vm_object_lock(object);
1517                                 /*
1518                                  * This call may take a long time,
1519                                  * since it could actively push
1520                                  * out pages (if we implement it
1521                                  * that way).
1522                                  */
1523                                 vm_object_res_deallocate(object);
1524                                 vm_object_unlock(object);
1525                         }
1526                 }
1527                 entry = entry->vme_next;
1528         }
1529         assert(map->sw_state == MAP_SW_IN);
1530         map->sw_state = MAP_SW_OUT;
1531 }
1532
1533 #endif  /* TASK_SWAPPER */
1534
1535 /*
1536  *      vm_map_lookup_entry:    [ internal use only ]
1537  *
1538  *      Calls into the vm map store layer to find the map
1539  *      entry containing (or immediately preceding) the
1540  *      specified address in the given map; the entry is returned
1541  *      in the "entry" parameter.  The boolean
1542  *      result indicates whether the address is
1543  *      actually contained in the map.
1544  */
1545 boolean_t
1546 vm_map_lookup_entry(
1547         vm_map_t                map,
1548         vm_map_offset_t address,
1549         vm_map_entry_t          *entry)         /* OUT */
1550 {
1551         return ( vm_map_store_lookup_entry( map, address, entry ));
1552 }
1553
1554 /*
1555  *      Routine:        vm_map_find_space
1556  *      Purpose:
1557  *              Allocate a range in the specified virtual address map,
1558  *              returning the entry allocated for that range.
1559  *              Used by kmem_alloc, etc.
1560  *
1561  *              The map must be NOT be locked. It will be returned locked
1562  *              on KERN_SUCCESS, unlocked on failure.
1563  *
1564  *              If an entry is allocated, the object/offset fields
1565  *              are initialized to zero.
1566  */
1567 kern_return_t
1568 vm_map_find_space(
1569         vm_map_t        map,
1570         vm_map_offset_t         *address,       /* OUT */
1571         vm_map_size_t           size,
1572         vm_map_offset_t         mask,
1573         int                     flags __unused,
1574         vm_map_kernel_flags_t   vmk_flags,
1575         vm_tag_t                tag,
1576         vm_map_entry_t          *o_entry)       /* OUT */
1577 {
1578         vm_map_entry_t                  entry, new_entry;
1579         vm_map_offset_t start;
1580         vm_map_offset_t end;
1581         vm_map_entry_t                  hole_entry;
1582
1583         if (size == 0) {
1584                 *address = 0;
1585                 return KERN_INVALID_ARGUMENT;
1586         }
1587
1588         if (vmk_flags.vmkf_guard_after) {
1589                 /* account for the back guard page in the size */
1590                 size += VM_MAP_PAGE_SIZE(map);
1591         }
1592
1593         new_entry = vm_map_entry_create(map, FALSE);
1594
1595         /*
1596          *      Look for the first possible address; if there's already
1597          *      something at this address, we have to start after it.
1598          */
1599
1600         vm_map_lock(map);
1601
1602         if( map->disable_vmentry_reuse == TRUE) {
1603                 VM_MAP_HIGHEST_ENTRY(map, entry, start);
1604         } else {
1605                 if (map->holelistenabled) {
1606                         hole_entry = (vm_map_entry_t)map->holes_list;
1607
1608                         if (hole_entry == NULL) {
1609                                 /*
1610                                  * No more space in the map?
1611                                  */
1612                                 vm_map_entry_dispose(map, new_entry);
1613                                 vm_map_unlock(map);
1614                                 return(KERN_NO_SPACE);
1615                         }
1616
1617                         entry = hole_entry;
1618                         start = entry->vme_start;
1619                 } else {
1620                         assert(first_free_is_valid(map));
1621                         if ((entry = map->first_free) == vm_map_to_entry(map))
1622                                 start = map->min_offset;
1623                         else
1624                                 start = entry->vme_end;
1625                 }
1626         }
1627
1628         /*
1629          *      In any case, the "entry" always precedes
1630          *      the proposed new region throughout the loop:
1631          */
1632
1633         while (TRUE) {
1634                 vm_map_entry_t  next;
1635
1636                 /*
1637                  *      Find the end of the proposed new region.
1638                  *      Be sure we didn't go beyond the end, or
1639                  *      wrap around the address.
1640                  */
1641
1642                 if (vmk_flags.vmkf_guard_before) {
1643                         /* reserve space for the front guard page */
1644                         start += VM_MAP_PAGE_SIZE(map);
1645                 }
1646                 end = ((start + mask) & ~mask);
1647
1648                 if (end < start) {
1649                         vm_map_entry_dispose(map, new_entry);
1650                         vm_map_unlock(map);
1651                         return(KERN_NO_SPACE);
1652                 }
1653                 start = end;
1654                 end += size;
1655
1656                 if ((end > map->max_offset) || (end < start)) {
1657                         vm_map_entry_dispose(map, new_entry);
1658                         vm_map_unlock(map);
1659                         return(KERN_NO_SPACE);
1660                 }
1661
1662                 next = entry->vme_next;
1663
1664                 if (map->holelistenabled) {
1665                         if (entry->vme_end >= end)
1666                                 break;
1667                 } else {
1668                         /*
1669                          *      If there are no more entries, we must win.
1670                          *
1671                          *      OR
1672                          *
1673                          *      If there is another entry, it must be
1674                          *      after the end of the potential new region.
1675                          */
1676
1677                         if (next == vm_map_to_entry(map))
1678                                 break;
1679
1680                         if (next->vme_start >= end)
1681                                 break;
1682                 }
1683
1684                 /*
1685                  *      Didn't fit -- move to the next entry.
1686                  */
1687
1688                 entry = next;
1689
1690                 if (map->holelistenabled) {
1691                         if (entry == (vm_map_entry_t) map->holes_list) {
1692                                 /*
1693                                  * Wrapped around
1694                                  */
1695                                 vm_map_entry_dispose(map, new_entry);
1696                                 vm_map_unlock(map);
1697                                 return(KERN_NO_SPACE);
1698                         }
1699                         start = entry->vme_start;
1700                 } else {
1701                         start = entry->vme_end;
1702                 }
1703         }
1704
1705         if (map->holelistenabled) {
1706                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
1707                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
1708                 }
1709         }
1710
1711         /*
1712          *      At this point,
1713          *              "start" and "end" should define the endpoints of the
1714          *                      available new range, and
1715          *              "entry" should refer to the region before the new
1716          *                      range, and
1717          *
1718          *              the map should be locked.
1719          */
1720
1721         if (vmk_flags.vmkf_guard_before) {
1722                 /* go back for the front guard page */
1723                 start -= VM_MAP_PAGE_SIZE(map);
1724         }
1725         *address = start;
1726
1727         assert(start < end);
1728         new_entry->vme_start = start;
1729         new_entry->vme_end = end;
1730         assert(page_aligned(new_entry->vme_start));
1731         assert(page_aligned(new_entry->vme_end));
1732         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
1733                                    VM_MAP_PAGE_MASK(map)));
1734         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
1735                                    VM_MAP_PAGE_MASK(map)));
1736
1737         new_entry->is_shared = FALSE;
1738         new_entry->is_sub_map = FALSE;
1739         new_entry->use_pmap = TRUE;
1740         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
1741         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
1742
1743         new_entry->needs_copy = FALSE;
1744
1745         new_entry->inheritance = VM_INHERIT_DEFAULT;
1746         new_entry->protection = VM_PROT_DEFAULT;
1747         new_entry->max_protection = VM_PROT_ALL;
1748         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
1749         new_entry->wired_count = 0;
1750         new_entry->user_wired_count = 0;
1751
1752         new_entry->in_transition = FALSE;
1753         new_entry->needs_wakeup = FALSE;
1754         new_entry->no_cache = FALSE;
1755         new_entry->permanent = FALSE;
1756         new_entry->superpage_size = FALSE;
1757         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
1758                 new_entry->map_aligned = TRUE;
1759         } else {
1760                 new_entry->map_aligned = FALSE;
1761         }
1762
1763         new_entry->used_for_jit = FALSE;
1764         new_entry->zero_wired_pages = FALSE;
1765         new_entry->iokit_acct = FALSE;
1766         new_entry->vme_resilient_codesign = FALSE;
1767         new_entry->vme_resilient_media = FALSE;
1768         if (vmk_flags.vmkf_atomic_entry)
1769                 new_entry->vme_atomic = TRUE;
1770         else
1771                 new_entry->vme_atomic = FALSE;
1772
1773         VME_ALIAS_SET(new_entry, tag);
1774
1775         /*
1776          *      Insert the new entry into the list
1777          */
1778
1779         vm_map_store_entry_link(map, entry, new_entry);
1780
1781         map->size += size;
1782
1783         /*
1784          *      Update the lookup hint
1785          */
1786         SAVE_HINT_MAP_WRITE(map, new_entry);
1787
1788         *o_entry = new_entry;
1789         return(KERN_SUCCESS);
1790 }
1791
1792 int vm_map_pmap_enter_print = FALSE;
1793 int vm_map_pmap_enter_enable = FALSE;
1794
1795 /*
1796  *      Routine:        vm_map_pmap_enter [internal only]
1797  *
1798  *      Description:
1799  *              Force pages from the specified object to be entered into
1800  *              the pmap at the specified address if they are present.
1801  *              As soon as a page not found in the object the scan ends.
1802  *
1803  *      Returns:
1804  *              Nothing.
1805  *
1806  *      In/out conditions:
1807  *              The source map should not be locked on entry.
1808  */
1809 __unused static void
1810 vm_map_pmap_enter(
1811         vm_map_t                map,
1812         vm_map_offset_t         addr,
1813         vm_map_offset_t         end_addr,
1814         vm_object_t             object,
1815         vm_object_offset_t      offset,
1816         vm_prot_t               protection)
1817 {
1818         int                     type_of_fault;
1819         kern_return_t           kr;
1820
1821         if(map->pmap == 0)
1822                 return;
1823
1824         while (addr < end_addr) {
1825                 vm_page_t       m;
1826
1827
1828                 /*
1829                  * TODO:
1830                  * From vm_map_enter(), we come into this function without the map
1831                  * lock held or the object lock held.
1832                  * We haven't taken a reference on the object either.
1833                  * We should do a proper lookup on the map to make sure
1834                  * that things are sane before we go locking objects that
1835                  * could have been deallocated from under us.
1836                  */
1837
1838                 vm_object_lock(object);
1839
1840                 m = vm_page_lookup(object, offset);
1841
1842                 if (m == VM_PAGE_NULL || m->busy || m->fictitious ||
1843                     (m->unusual && ( m->error || m->restart || m->absent))) {
1844                         vm_object_unlock(object);
1845                         return;
1846                 }
1847
1848                 if (vm_map_pmap_enter_print) {
1849                         printf("vm_map_pmap_enter:");
1850                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
1851                                map, (unsigned long long)addr, object, (unsigned long long)offset);
1852                 }
1853                 type_of_fault = DBG_CACHE_HIT_FAULT;
1854                 kr = vm_fault_enter(m, map->pmap, addr, protection, protection,
1855                                                     VM_PAGE_WIRED(m),
1856                                                     FALSE, /* change_wiring */
1857                                                     VM_KERN_MEMORY_NONE, /* tag - not wiring */
1858                                                     FALSE, /* no_cache */
1859                                                     FALSE, /* cs_bypass */
1860                                                     0,     /* XXX need user tag / alias? */
1861                                                     0,     /* pmap_options */
1862                                                     NULL,  /* need_retry */
1863                                                     &type_of_fault);
1864
1865                 vm_object_unlock(object);
1866
1867                 offset += PAGE_SIZE_64;
1868                 addr += PAGE_SIZE;
1869         }
1870 }
1871
1872 boolean_t vm_map_pmap_is_empty(
1873         vm_map_t        map,
1874         vm_map_offset_t start,
1875         vm_map_offset_t end);
1876 boolean_t vm_map_pmap_is_empty(
1877         vm_map_t        map,
1878         vm_map_offset_t start,
1879         vm_map_offset_t end)
1880 {
1881 #ifdef MACHINE_PMAP_IS_EMPTY
1882         return pmap_is_empty(map->pmap, start, end);
1883 #else   /* MACHINE_PMAP_IS_EMPTY */
1884         vm_map_offset_t offset;
1885         ppnum_t         phys_page;
1886
1887         if (map->pmap == NULL) {
1888                 return TRUE;
1889         }
1890
1891         for (offset = start;
1892              offset < end;
1893              offset += PAGE_SIZE) {
1894                 phys_page = pmap_find_phys(map->pmap, offset);
1895                 if (phys_page) {
1896                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
1897                                 "page %d at 0x%llx\n",
1898                                 map, (long long)start, (long long)end,
1899                                 phys_page, (long long)offset);
1900                         return FALSE;
1901                 }
1902         }
1903         return TRUE;
1904 #endif  /* MACHINE_PMAP_IS_EMPTY */
1905 }
1906
1907 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
1908 kern_return_t
1909 vm_map_random_address_for_size(
1910         vm_map_t        map,
1911         vm_map_offset_t *address,
1912         vm_map_size_t   size)
1913 {
1914         kern_return_t   kr = KERN_SUCCESS;
1915         int             tries = 0;
1916         vm_map_offset_t random_addr = 0;
1917         vm_map_offset_t hole_end;
1918
1919         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
1920         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
1921         vm_map_size_t   vm_hole_size = 0;
1922         vm_map_size_t   addr_space_size;
1923
1924         addr_space_size = vm_map_max(map) - vm_map_min(map);
1925
1926         assert(page_aligned(size));
1927
1928         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1929                 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT;
1930                 random_addr = vm_map_trunc_page(
1931                         vm_map_min(map) +(random_addr % addr_space_size),
1932                         VM_MAP_PAGE_MASK(map));
1933
1934                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
1935                         if (prev_entry == vm_map_to_entry(map)) {
1936                                 next_entry = vm_map_first_entry(map);
1937                         } else {
1938                                 next_entry = prev_entry->vme_next;
1939                         }
1940                         if (next_entry == vm_map_to_entry(map)) {
1941                                 hole_end = vm_map_max(map);
1942                         } else {
1943                                 hole_end = next_entry->vme_start;
1944                         }
1945                         vm_hole_size = hole_end - random_addr;
1946                         if (vm_hole_size >= size) {
1947                                 *address = random_addr;
1948                                 break;
1949                         }
1950                 }
1951                 tries++;
1952         }
1953
1954         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
1955                 kr = KERN_NO_SPACE;
1956         }
1957         return kr;
1958 }
1959
1960 /*
1961  *      Routine:        vm_map_enter
1962  *
1963  *      Description:
1964  *              Allocate a range in the specified virtual address map.
1965  *              The resulting range will refer to memory defined by
1966  *              the given memory object and offset into that object.
1967  *
1968  *              Arguments are as defined in the vm_map call.
1969  */
1970 int _map_enter_debug = 0;
1971 static unsigned int vm_map_enter_restore_successes = 0;
1972 static unsigned int vm_map_enter_restore_failures = 0;
1973 kern_return_t
1974 vm_map_enter(
1975         vm_map_t                map,
1976         vm_map_offset_t         *address,       /* IN/OUT */
1977         vm_map_size_t           size,
1978         vm_map_offset_t         mask,
1979         int                     flags,
1980         vm_map_kernel_flags_t   vmk_flags,
1981         vm_tag_t                alias,
1982         vm_object_t             object,
1983         vm_object_offset_t      offset,
1984         boolean_t               needs_copy,
1985         vm_prot_t               cur_protection,
1986         vm_prot_t               max_protection,
1987         vm_inherit_t            inheritance)
1988 {
1989         vm_map_entry_t          entry, new_entry;
1990         vm_map_offset_t         start, tmp_start, tmp_offset;
1991         vm_map_offset_t         end, tmp_end;
1992         vm_map_offset_t         tmp2_start, tmp2_end;
1993         vm_map_offset_t         step;
1994         kern_return_t           result = KERN_SUCCESS;
1995         vm_map_t                zap_old_map = VM_MAP_NULL;
1996         vm_map_t                zap_new_map = VM_MAP_NULL;
1997         boolean_t               map_locked = FALSE;
1998         boolean_t               pmap_empty = TRUE;
1999         boolean_t               new_mapping_established = FALSE;
2000         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2001         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2002         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2003         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2004         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2005         boolean_t               is_submap = vmk_flags.vmkf_submap;
2006         boolean_t               permanent = vmk_flags.vmkf_permanent;
2007         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2008         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2009         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2010         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2011         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2012         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2013         vm_tag_t                user_alias;
2014         vm_map_offset_t         effective_min_offset, effective_max_offset;
2015         kern_return_t           kr;
2016         boolean_t               clear_map_aligned = FALSE;
2017         vm_map_entry_t          hole_entry;
2018
2019         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2020
2021         if (superpage_size) {
2022                 switch (superpage_size) {
2023                         /*
2024                          * Note that the current implementation only supports
2025                          * a single size for superpages, SUPERPAGE_SIZE, per
2026                          * architecture. As soon as more sizes are supposed
2027                          * to be supported, SUPERPAGE_SIZE has to be replaced
2028                          * with a lookup of the size depending on superpage_size.
2029                          */
2030 #ifdef __x86_64__
2031                         case SUPERPAGE_SIZE_ANY:
2032                                 /* handle it like 2 MB and round up to page size */
2033                                 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1);
2034                         case SUPERPAGE_SIZE_2MB:
2035                                 break;
2036 #endif
2037                         default:
2038                                 return KERN_INVALID_ARGUMENT;
2039                 }
2040                 mask = SUPERPAGE_SIZE-1;
2041                 if (size & (SUPERPAGE_SIZE-1))
2042                         return KERN_INVALID_ARGUMENT;
2043                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2044         }
2045
2046
2047 #if CONFIG_EMBEDDED
2048         if (cur_protection & VM_PROT_WRITE){
2049                 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){
2050                         printf("EMBEDDED: %s: curprot cannot be write+execute. "
2051                                "turning off execute\n",
2052                                __FUNCTION__);
2053                         cur_protection &= ~VM_PROT_EXECUTE;
2054                 }
2055         }
2056 #endif /* CONFIG_EMBEDDED */
2057
2058         if (resilient_codesign || resilient_media) {
2059                 if ((cur_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ||
2060                     (max_protection & (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2061                         return KERN_PROTECTION_FAILURE;
2062                 }
2063         }
2064
2065         if (is_submap) {
2066                 if (purgable) {
2067                         /* submaps can not be purgeable */
2068                         return KERN_INVALID_ARGUMENT;
2069                 }
2070                 if (object == VM_OBJECT_NULL) {
2071                         /* submaps can not be created lazily */
2072                         return KERN_INVALID_ARGUMENT;
2073                 }
2074         }
2075         if (vmk_flags.vmkf_already) {
2076                 /*
2077                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2078                  * is already present.  For it to be meaningul, the requested
2079                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2080                  * we shouldn't try and remove what was mapped there first
2081                  * (!VM_FLAGS_OVERWRITE).
2082                  */
2083                 if ((flags & VM_FLAGS_ANYWHERE) ||
2084                     (flags & VM_FLAGS_OVERWRITE)) {
2085                         return KERN_INVALID_ARGUMENT;
2086                 }
2087         }
2088
2089         effective_min_offset = map->min_offset;
2090
2091         if (vmk_flags.vmkf_beyond_max) {
2092                 /*
2093                  * Allow an insertion beyond the map's max offset.
2094                  */
2095 #if     !defined(__arm__) && !defined(__arm64__)
2096                 if (vm_map_is_64bit(map))
2097                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2098                 else
2099 #endif  /* __arm__ */
2100                         effective_max_offset = 0x00000000FFFFF000ULL;
2101         } else {
2102                 effective_max_offset = map->max_offset;
2103         }
2104
2105         if (size == 0 ||
2106             (offset & PAGE_MASK_64) != 0) {
2107                 *address = 0;
2108                 return KERN_INVALID_ARGUMENT;
2109         }
2110
2111         if (map->pmap == kernel_pmap) {
2112                 user_alias = VM_KERN_MEMORY_NONE;
2113         } else {
2114                 user_alias = alias;
2115         }
2116
2117 #define RETURN(value)   { result = value; goto BailOut; }
2118
2119         assert(page_aligned(*address));
2120         assert(page_aligned(size));
2121
2122         if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2123                 /*
2124                  * In most cases, the caller rounds the size up to the
2125                  * map's page size.
2126                  * If we get a size that is explicitly not map-aligned here,
2127                  * we'll have to respect the caller's wish and mark the
2128                  * mapping as "not map-aligned" to avoid tripping the
2129                  * map alignment checks later.
2130                  */
2131                 clear_map_aligned = TRUE;
2132         }
2133         if (!anywhere &&
2134             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2135                 /*
2136                  * We've been asked to map at a fixed address and that
2137                  * address is not aligned to the map's specific alignment.
2138                  * The caller should know what it's doing (i.e. most likely
2139                  * mapping some fragmented copy map, transferring memory from
2140                  * a VM map with a different alignment), so clear map_aligned
2141                  * for this new VM map entry and proceed.
2142                  */
2143                 clear_map_aligned = TRUE;
2144         }
2145
2146         /*
2147          * Only zero-fill objects are allowed to be purgable.
2148          * LP64todo - limit purgable objects to 32-bits for now
2149          */
2150         if (purgable &&
2151             (offset != 0 ||
2152              (object != VM_OBJECT_NULL &&
2153               (object->vo_size != size ||
2154                object->purgable == VM_PURGABLE_DENY))
2155              || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */
2156                 return KERN_INVALID_ARGUMENT;
2157
2158         if (!anywhere && overwrite) {
2159                 /*
2160                  * Create a temporary VM map to hold the old mappings in the
2161                  * affected area while we create the new one.
2162                  * This avoids releasing the VM map lock in
2163                  * vm_map_entry_delete() and allows atomicity
2164                  * when we want to replace some mappings with a new one.
2165                  * It also allows us to restore the old VM mappings if the
2166                  * new mapping fails.
2167                  */
2168                 zap_old_map = vm_map_create(PMAP_NULL,
2169                                             *address,
2170                                             *address + size,
2171                                             map->hdr.entries_pageable);
2172                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2173                 vm_map_disable_hole_optimization(zap_old_map);
2174         }
2175
2176 StartAgain: ;
2177
2178         start = *address;
2179
2180         if (anywhere) {
2181                 vm_map_lock(map);
2182                 map_locked = TRUE;
2183
2184                 if (entry_for_jit) {
2185                         if (map->jit_entry_exists) {
2186                                 result = KERN_INVALID_ARGUMENT;
2187                                 goto BailOut;
2188                         }
2189                         random_address = TRUE;
2190                 }
2191
2192                 if (random_address) {
2193                         /*
2194                          * Get a random start address.
2195                          */
2196                         result = vm_map_random_address_for_size(map, address, size);
2197                         if (result != KERN_SUCCESS) {
2198                                 goto BailOut;
2199                         }
2200                         start = *address;
2201                 }
2202 #if __x86_64__
2203                 else if ((start == 0 || start == vm_map_min(map)) &&
2204                          !map->disable_vmentry_reuse &&
2205                          map->vmmap_high_start != 0) {
2206                         start = map->vmmap_high_start;
2207                 }
2208 #endif /* __x86_64__ */
2209
2210
2211                 /*
2212                  *      Calculate the first possible address.
2213                  */
2214
2215                 if (start < effective_min_offset)
2216                         start = effective_min_offset;
2217                 if (start > effective_max_offset)
2218                         RETURN(KERN_NO_SPACE);
2219
2220                 /*
2221                  *      Look for the first possible address;
2222                  *      if there's already something at this
2223                  *      address, we have to start after it.
2224                  */
2225
2226                 if( map->disable_vmentry_reuse == TRUE) {
2227                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2228                 } else {
2229
2230                         if (map->holelistenabled) {
2231                                 hole_entry = (vm_map_entry_t)map->holes_list;
2232
2233                                 if (hole_entry == NULL) {
2234                                         /*
2235                                          * No more space in the map?
2236                                          */
2237                                         result = KERN_NO_SPACE;
2238                                         goto BailOut;
2239                                 } else {
2240
2241                                         boolean_t found_hole = FALSE;
2242
2243                                         do {
2244                                                 if (hole_entry->vme_start >= start) {
2245                                                         start = hole_entry->vme_start;
2246                                                         found_hole = TRUE;
2247                                                         break;
2248                                                 }
2249
2250                                                 if (hole_entry->vme_end > start) {
2251                                                         found_hole = TRUE;
2252                                                         break;
2253                                                 }
2254                                                 hole_entry = hole_entry->vme_next;
2255
2256                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
2257
2258                                         if (found_hole == FALSE) {
2259                                                 result = KERN_NO_SPACE;
2260                                                 goto BailOut;
2261                                         }
2262
2263                                         entry = hole_entry;
2264
2265                                         if (start == 0)
2266                                                 start += PAGE_SIZE_64;
2267                                 }
2268                         } else {
2269                                 assert(first_free_is_valid(map));
2270
2271                                 entry = map->first_free;
2272
2273                                 if (entry == vm_map_to_entry(map)) {
2274                                         entry = NULL;
2275                                 } else {
2276                                        if (entry->vme_next == vm_map_to_entry(map)){
2277                                                /*
2278                                                 * Hole at the end of the map.
2279                                                 */
2280                                                 entry = NULL;
2281                                        } else {
2282                                                 if (start < (entry->vme_next)->vme_start ) {
2283                                                         start = entry->vme_end;
2284                                                         start = vm_map_round_page(start,
2285                                                                                   VM_MAP_PAGE_MASK(map));
2286                                                 } else {
2287                                                         /*
2288                                                          * Need to do a lookup.
2289                                                          */
2290                                                         entry = NULL;
2291                                                 }
2292                                        }
2293                                 }
2294
2295                                 if (entry == NULL) {
2296                                         vm_map_entry_t  tmp_entry;
2297                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2298                                                 assert(!entry_for_jit);
2299                                                 start = tmp_entry->vme_end;
2300                                                 start = vm_map_round_page(start,
2301                                                                           VM_MAP_PAGE_MASK(map));
2302                                         }
2303                                         entry = tmp_entry;
2304                                 }
2305                         }
2306                 }
2307
2308                 /*
2309                  *      In any case, the "entry" always precedes
2310                  *      the proposed new region throughout the
2311                  *      loop:
2312                  */
2313
2314                 while (TRUE) {
2315                         vm_map_entry_t  next;
2316
2317                         /*
2318                          *      Find the end of the proposed new region.
2319                          *      Be sure we didn't go beyond the end, or
2320                          *      wrap around the address.
2321                          */
2322
2323                         end = ((start + mask) & ~mask);
2324                         end = vm_map_round_page(end,
2325                                                 VM_MAP_PAGE_MASK(map));
2326                         if (end < start)
2327                                 RETURN(KERN_NO_SPACE);
2328                         start = end;
2329                         assert(VM_MAP_PAGE_ALIGNED(start,
2330                                                    VM_MAP_PAGE_MASK(map)));
2331                         end += size;
2332
2333                         if ((end > effective_max_offset) || (end < start)) {
2334                                 if (map->wait_for_space) {
2335                                         assert(!keep_map_locked);
2336                                         if (size <= (effective_max_offset -
2337                                                      effective_min_offset)) {
2338                                                 assert_wait((event_t)map,
2339                                                             THREAD_ABORTSAFE);
2340                                                 vm_map_unlock(map);
2341                                                 map_locked = FALSE;
2342                                                 thread_block(THREAD_CONTINUE_NULL);
2343                                                 goto StartAgain;
2344                                         }
2345                                 }
2346                                 RETURN(KERN_NO_SPACE);
2347                         }
2348
2349                         next = entry->vme_next;
2350
2351                         if (map->holelistenabled) {
2352                                 if (entry->vme_end >= end)
2353                                         break;
2354                         } else {
2355                                 /*
2356                                  *      If there are no more entries, we must win.
2357                                  *
2358                                  *      OR
2359                                  *
2360                                  *      If there is another entry, it must be
2361                                  *      after the end of the potential new region.
2362                                  */
2363
2364                                 if (next == vm_map_to_entry(map))
2365                                         break;
2366
2367                                 if (next->vme_start >= end)
2368                                         break;
2369                         }
2370
2371                         /*
2372                          *      Didn't fit -- move to the next entry.
2373                          */
2374
2375                         entry = next;
2376
2377                         if (map->holelistenabled) {
2378                                 if (entry == (vm_map_entry_t) map->holes_list) {
2379                                         /*
2380                                          * Wrapped around
2381                                          */
2382                                         result = KERN_NO_SPACE;
2383                                         goto BailOut;
2384                                 }
2385                                 start = entry->vme_start;
2386                         } else {
2387                                 start = entry->vme_end;
2388                         }
2389
2390                         start = vm_map_round_page(start,
2391                                                   VM_MAP_PAGE_MASK(map));
2392                 }
2393
2394                 if (map->holelistenabled) {
2395                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2396                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2397                         }
2398                 }
2399
2400                 *address = start;
2401                 assert(VM_MAP_PAGE_ALIGNED(*address,
2402                                            VM_MAP_PAGE_MASK(map)));
2403         } else {
2404                 /*
2405                  *      Verify that:
2406                  *              the address doesn't itself violate
2407                  *              the mask requirement.
2408                  */
2409
2410                 vm_map_lock(map);
2411                 map_locked = TRUE;
2412                 if ((start & mask) != 0)
2413                         RETURN(KERN_NO_SPACE);
2414
2415                 /*
2416                  *      ...     the address is within bounds
2417                  */
2418
2419                 end = start + size;
2420
2421                 if ((start < effective_min_offset) ||
2422                     (end > effective_max_offset) ||
2423                     (start >= end)) {
2424                         RETURN(KERN_INVALID_ADDRESS);
2425                 }
2426
2427                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2428                         int remove_flags;
2429                         /*
2430                          * Fixed mapping and "overwrite" flag: attempt to
2431                          * remove all existing mappings in the specified
2432                          * address range, saving them in our "zap_old_map".
2433                          */
2434                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2435                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2436                         if (vmk_flags.vmkf_overwrite_immutable) {
2437                                 /* we can overwrite immutable mappings */
2438                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2439                         }
2440                         (void) vm_map_delete(map, start, end,
2441                                              remove_flags,
2442                                              zap_old_map);
2443                 }
2444
2445                 /*
2446                  *      ...     the starting address isn't allocated
2447                  */
2448
2449                 if (vm_map_lookup_entry(map, start, &entry)) {
2450                         if (! (vmk_flags.vmkf_already)) {
2451                                 RETURN(KERN_NO_SPACE);
2452                         }
2453                         /*
2454                          * Check if what's already there is what we want.
2455                          */
2456                         tmp_start = start;
2457                         tmp_offset = offset;
2458                         if (entry->vme_start < start) {
2459                                 tmp_start -= start - entry->vme_start;
2460                                 tmp_offset -= start - entry->vme_start;
2461
2462                         }
2463                         for (; entry->vme_start < end;
2464                              entry = entry->vme_next) {
2465                                 /*
2466                                  * Check if the mapping's attributes
2467                                  * match the existing map entry.
2468                                  */
2469                                 if (entry == vm_map_to_entry(map) ||
2470                                     entry->vme_start != tmp_start ||
2471                                     entry->is_sub_map != is_submap ||
2472                                     VME_OFFSET(entry) != tmp_offset ||
2473                                     entry->needs_copy != needs_copy ||
2474                                     entry->protection != cur_protection ||
2475                                     entry->max_protection != max_protection ||
2476                                     entry->inheritance != inheritance ||
2477                                     entry->iokit_acct != iokit_acct ||
2478                                     VME_ALIAS(entry) != alias) {
2479                                         /* not the same mapping ! */
2480                                         RETURN(KERN_NO_SPACE);
2481                                 }
2482                                 /*
2483                                  * Check if the same object is being mapped.
2484                                  */
2485                                 if (is_submap) {
2486                                         if (VME_SUBMAP(entry) !=
2487                                             (vm_map_t) object) {
2488                                                 /* not the same submap */
2489                                                 RETURN(KERN_NO_SPACE);
2490                                         }
2491                                 } else {
2492                                         if (VME_OBJECT(entry) != object) {
2493                                                 /* not the same VM object... */
2494                                                 vm_object_t obj2;
2495
2496                                                 obj2 = VME_OBJECT(entry);
2497                                                 if ((obj2 == VM_OBJECT_NULL ||
2498                                                      obj2->internal) &&
2499                                                     (object == VM_OBJECT_NULL ||
2500                                                      object->internal)) {
2501                                                         /*
2502                                                          * ... but both are
2503                                                          * anonymous memory,
2504                                                          * so equivalent.
2505                                                          */
2506                                                 } else {
2507                                                         RETURN(KERN_NO_SPACE);
2508                                                 }
2509                                         }
2510                                 }
2511
2512                                 tmp_offset += entry->vme_end - entry->vme_start;
2513                                 tmp_start += entry->vme_end - entry->vme_start;
2514                                 if (entry->vme_end >= end) {
2515                                         /* reached the end of our mapping */
2516                                         break;
2517                                 }
2518                         }
2519                         /* it all matches:  let's use what's already there ! */
2520                         RETURN(KERN_MEMORY_PRESENT);
2521                 }
2522
2523                 /*
2524                  *      ...     the next region doesn't overlap the
2525                  *              end point.
2526                  */
2527
2528                 if ((entry->vme_next != vm_map_to_entry(map)) &&
2529                     (entry->vme_next->vme_start < end))
2530                         RETURN(KERN_NO_SPACE);
2531         }
2532
2533         /*
2534          *      At this point,
2535          *              "start" and "end" should define the endpoints of the
2536          *                      available new range, and
2537          *              "entry" should refer to the region before the new
2538          *                      range, and
2539          *
2540          *              the map should be locked.
2541          */
2542
2543         /*
2544          *      See whether we can avoid creating a new entry (and object) by
2545          *      extending one of our neighbors.  [So far, we only attempt to
2546          *      extend from below.]  Note that we can never extend/join
2547          *      purgable objects because they need to remain distinct
2548          *      entities in order to implement their "volatile object"
2549          *      semantics.
2550          */
2551
2552         if (purgable || entry_for_jit) {
2553                 if (object == VM_OBJECT_NULL) {
2554
2555                         object = vm_object_allocate(size);
2556                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
2557                         object->true_share = TRUE;
2558                         if (purgable) {
2559                                 task_t owner;
2560                                 object->purgable = VM_PURGABLE_NONVOLATILE;
2561                                 if (map->pmap == kernel_pmap) {
2562                                         /*
2563                                          * Purgeable mappings made in a kernel
2564                                          * map are "owned" by the kernel itself
2565                                          * rather than the current user task
2566                                          * because they're likely to be used by
2567                                          * more than this user task (see
2568                                          * execargs_purgeable_allocate(), for
2569                                          * example).
2570                                          */
2571                                         owner = kernel_task;
2572                                 } else {
2573                                         owner = current_task();
2574                                 }
2575                                 assert(object->vo_purgeable_owner == NULL);
2576                                 assert(object->resident_page_count == 0);
2577                                 assert(object->wired_page_count == 0);
2578                                 vm_object_lock(object);
2579                                 vm_purgeable_nonvolatile_enqueue(object, owner);
2580                                 vm_object_unlock(object);
2581                         }
2582                         offset = (vm_object_offset_t)0;
2583                 }
2584         } else if ((is_submap == FALSE) &&
2585                    (object == VM_OBJECT_NULL) &&
2586                    (entry != vm_map_to_entry(map)) &&
2587                    (entry->vme_end == start) &&
2588                    (!entry->is_shared) &&
2589                    (!entry->is_sub_map) &&
2590                    (!entry->in_transition) &&
2591                    (!entry->needs_wakeup) &&
2592                    (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
2593                    (entry->protection == cur_protection) &&
2594                    (entry->max_protection == max_protection) &&
2595                    (entry->inheritance == inheritance) &&
2596                    ((user_alias == VM_MEMORY_REALLOC) ||
2597                     (VME_ALIAS(entry) == alias)) &&
2598                    (entry->no_cache == no_cache) &&
2599                    (entry->permanent == permanent) &&
2600                    /* no coalescing for immutable executable mappings */
2601                    !((entry->protection & VM_PROT_EXECUTE) &&
2602                      entry->permanent) &&
2603                    (!entry->superpage_size && !superpage_size) &&
2604                    /*
2605                     * No coalescing if not map-aligned, to avoid propagating
2606                     * that condition any further than needed:
2607                     */
2608                    (!entry->map_aligned || !clear_map_aligned) &&
2609                    (!entry->zero_wired_pages) &&
2610                    (!entry->used_for_jit && !entry_for_jit) &&
2611                    (entry->iokit_acct == iokit_acct) &&
2612                    (!entry->vme_resilient_codesign) &&
2613                    (!entry->vme_resilient_media) &&
2614                    (!entry->vme_atomic) &&
2615
2616                    ((entry->vme_end - entry->vme_start) + size <=
2617                     (user_alias == VM_MEMORY_REALLOC ?
2618                      ANON_CHUNK_SIZE :
2619                      NO_COALESCE_LIMIT)) &&
2620
2621                    (entry->wired_count == 0)) { /* implies user_wired_count == 0 */
2622                 if (vm_object_coalesce(VME_OBJECT(entry),
2623                                        VM_OBJECT_NULL,
2624                                        VME_OFFSET(entry),
2625                                        (vm_object_offset_t) 0,
2626                                        (vm_map_size_t)(entry->vme_end - entry->vme_start),
2627                                        (vm_map_size_t)(end - entry->vme_end))) {
2628
2629                         /*
2630                          *      Coalesced the two objects - can extend
2631                          *      the previous map entry to include the
2632                          *      new range.
2633                          */
2634                         map->size += (end - entry->vme_end);
2635                         assert(entry->vme_start < end);
2636                         assert(VM_MAP_PAGE_ALIGNED(end,
2637                                                    VM_MAP_PAGE_MASK(map)));
2638                         if (__improbable(vm_debug_events))
2639                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
2640                         entry->vme_end = end;
2641                         if (map->holelistenabled) {
2642                                 vm_map_store_update_first_free(map, entry, TRUE);
2643                         } else {
2644                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
2645                         }
2646                         new_mapping_established = TRUE;
2647                         RETURN(KERN_SUCCESS);
2648                 }
2649         }
2650
2651         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
2652         new_entry = NULL;
2653
2654         for (tmp2_start = start; tmp2_start<end; tmp2_start += step) {
2655                 tmp2_end = tmp2_start + step;
2656                 /*
2657                  *      Create a new entry
2658                  *      LP64todo - for now, we can only allocate 4GB internal objects
2659                  *      because the default pager can't page bigger ones.  Remove this
2660                  *      when it can.
2661                  *
2662                  * XXX FBDP
2663                  * The reserved "page zero" in each process's address space can
2664                  * be arbitrarily large.  Splitting it into separate 4GB objects and
2665                  * therefore different VM map entries serves no purpose and just
2666                  * slows down operations on the VM map, so let's not split the
2667                  * allocation into 4GB chunks if the max protection is NONE.  That
2668                  * memory should never be accessible, so it will never get to the
2669                  * default pager.
2670                  */
2671                 tmp_start = tmp2_start;
2672                 if (object == VM_OBJECT_NULL &&
2673                     size > (vm_map_size_t)ANON_CHUNK_SIZE &&
2674                     max_protection != VM_PROT_NONE &&
2675                     superpage_size == 0)
2676                         tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE;
2677                 else
2678                         tmp_end = tmp2_end;
2679                 do {
2680                         new_entry = vm_map_entry_insert(
2681                                 map, entry, tmp_start, tmp_end,
2682                                 object, offset, needs_copy,
2683                                 FALSE, FALSE,
2684                                 cur_protection, max_protection,
2685                                 VM_BEHAVIOR_DEFAULT,
2686                                 (entry_for_jit)? VM_INHERIT_NONE: inheritance,
2687                                 0,
2688                                 no_cache,
2689                                 permanent,
2690                                 superpage_size,
2691                                 clear_map_aligned,
2692                                 is_submap,
2693                                 entry_for_jit,
2694                                 alias);
2695
2696                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
2697
2698                         if (resilient_codesign &&
2699                             ! ((cur_protection | max_protection) &
2700                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2701                                 new_entry->vme_resilient_codesign = TRUE;
2702                         }
2703
2704                         if (resilient_media &&
2705                             ! ((cur_protection | max_protection) &
2706                                (VM_PROT_WRITE | VM_PROT_EXECUTE))) {
2707                                 new_entry->vme_resilient_media = TRUE;
2708                         }
2709
2710                         assert(!new_entry->iokit_acct);
2711                         if (!is_submap &&
2712                             object != VM_OBJECT_NULL &&
2713                             object->purgable != VM_PURGABLE_DENY) {
2714                                 assert(new_entry->use_pmap);
2715                                 assert(!new_entry->iokit_acct);
2716                                 /*
2717                                  * Turn off pmap accounting since
2718                                  * purgeable objects have their
2719                                  * own ledgers.
2720                                  */
2721                                 new_entry->use_pmap = FALSE;
2722                         } else if (!is_submap &&
2723                                    iokit_acct &&
2724                                    object != VM_OBJECT_NULL &&
2725                                    object->internal) {
2726                                 /* alternate accounting */
2727                                 assert(!new_entry->iokit_acct);
2728                                 assert(new_entry->use_pmap);
2729                                 new_entry->iokit_acct = TRUE;
2730                                 new_entry->use_pmap = FALSE;
2731                                 DTRACE_VM4(
2732                                         vm_map_iokit_mapped_region,
2733                                         vm_map_t, map,
2734                                         vm_map_offset_t, new_entry->vme_start,
2735                                         vm_map_offset_t, new_entry->vme_end,
2736                                         int, VME_ALIAS(new_entry));
2737                                 vm_map_iokit_mapped_region(
2738                                         map,
2739                                         (new_entry->vme_end -
2740                                          new_entry->vme_start));
2741                         } else if (!is_submap) {
2742                                 assert(!new_entry->iokit_acct);
2743                                 assert(new_entry->use_pmap);
2744                         }
2745
2746                         if (is_submap) {
2747                                 vm_map_t        submap;
2748                                 boolean_t       submap_is_64bit;
2749                                 boolean_t       use_pmap;
2750
2751                                 assert(new_entry->is_sub_map);
2752                                 assert(!new_entry->use_pmap);
2753                                 assert(!new_entry->iokit_acct);
2754                                 submap = (vm_map_t) object;
2755                                 submap_is_64bit = vm_map_is_64bit(submap);
2756                                 use_pmap = (user_alias == VM_MEMORY_SHARED_PMAP);
2757 #ifndef NO_NESTED_PMAP
2758                                 if (use_pmap && submap->pmap == NULL) {
2759                                         ledger_t ledger = map->pmap->ledger;
2760                                         /* we need a sub pmap to nest... */
2761                                         submap->pmap = pmap_create(ledger, 0,
2762                                             submap_is_64bit);
2763                                         if (submap->pmap == NULL) {
2764                                                 /* let's proceed without nesting... */
2765                                         }
2766 #if     defined(__arm__) || defined(__arm64__)
2767                                         else {
2768                                                 pmap_set_nested(submap->pmap);
2769                                         }
2770 #endif
2771                                 }
2772                                 if (use_pmap && submap->pmap != NULL) {
2773                                         kr = pmap_nest(map->pmap,
2774                                                        submap->pmap,
2775                                                        tmp_start,
2776                                                        tmp_start,
2777                                                        tmp_end - tmp_start);
2778                                         if (kr != KERN_SUCCESS) {
2779                                                 printf("vm_map_enter: "
2780                                                        "pmap_nest(0x%llx,0x%llx) "
2781                                                        "error 0x%x\n",
2782                                                        (long long)tmp_start,
2783                                                        (long long)tmp_end,
2784                                                        kr);
2785                                         } else {
2786                                                 /* we're now nested ! */
2787                                                 new_entry->use_pmap = TRUE;
2788                                                 pmap_empty = FALSE;
2789                                         }
2790                                 }
2791 #endif /* NO_NESTED_PMAP */
2792                         }
2793                         entry = new_entry;
2794
2795                         if (superpage_size) {
2796                                 vm_page_t pages, m;
2797                                 vm_object_t sp_object;
2798                                 vm_object_offset_t sp_offset;
2799
2800                                 VME_OFFSET_SET(entry, 0);
2801
2802                                 /* allocate one superpage */
2803                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0);
2804                                 if (kr != KERN_SUCCESS) {
2805                                         /* deallocate whole range... */
2806                                         new_mapping_established = TRUE;
2807                                         /* ... but only up to "tmp_end" */
2808                                         size -= end - tmp_end;
2809                                         RETURN(kr);
2810                                 }
2811
2812                                 /* create one vm_object per superpage */
2813                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
2814                                 sp_object->phys_contiguous = TRUE;
2815                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages)*PAGE_SIZE;
2816                                 VME_OBJECT_SET(entry, sp_object);
2817                                 assert(entry->use_pmap);
2818
2819                                 /* enter the base pages into the object */
2820                                 vm_object_lock(sp_object);
2821                                 for (sp_offset = 0;
2822                                      sp_offset < SUPERPAGE_SIZE;
2823                                      sp_offset += PAGE_SIZE) {
2824                                         m = pages;
2825                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
2826                                         pages = NEXT_PAGE(m);
2827                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
2828                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
2829                                 }
2830                                 vm_object_unlock(sp_object);
2831                         }
2832                 } while (tmp_end != tmp2_end &&
2833                          (tmp_start = tmp_end) &&
2834                          (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ?
2835                           tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end));
2836         }
2837
2838         new_mapping_established = TRUE;
2839
2840 BailOut:
2841         assert(map_locked == TRUE);
2842
2843         if (result == KERN_SUCCESS) {
2844                 vm_prot_t pager_prot;
2845                 memory_object_t pager;
2846
2847 #if DEBUG
2848                 if (pmap_empty &&
2849                     !(vmk_flags.vmkf_no_pmap_check)) {
2850                         assert(vm_map_pmap_is_empty(map,
2851                                                     *address,
2852                                                     *address+size));
2853                 }
2854 #endif /* DEBUG */
2855
2856                 /*
2857                  * For "named" VM objects, let the pager know that the
2858                  * memory object is being mapped.  Some pagers need to keep
2859                  * track of this, to know when they can reclaim the memory
2860                  * object, for example.
2861                  * VM calls memory_object_map() for each mapping (specifying
2862                  * the protection of each mapping) and calls
2863                  * memory_object_last_unmap() when all the mappings are gone.
2864                  */
2865                 pager_prot = max_protection;
2866                 if (needs_copy) {
2867                         /*
2868                          * Copy-On-Write mapping: won't modify
2869                          * the memory object.
2870                          */
2871                         pager_prot &= ~VM_PROT_WRITE;
2872                 }
2873                 if (!is_submap &&
2874                     object != VM_OBJECT_NULL &&
2875                     object->named &&
2876                     object->pager != MEMORY_OBJECT_NULL) {
2877                         vm_object_lock(object);
2878                         pager = object->pager;
2879                         if (object->named &&
2880                             pager != MEMORY_OBJECT_NULL) {
2881                                 assert(object->pager_ready);
2882                                 vm_object_mapping_wait(object, THREAD_UNINT);
2883                                 vm_object_mapping_begin(object);
2884                                 vm_object_unlock(object);
2885
2886                                 kr = memory_object_map(pager, pager_prot);
2887                                 assert(kr == KERN_SUCCESS);
2888
2889                                 vm_object_lock(object);
2890                                 vm_object_mapping_end(object);
2891                         }
2892                         vm_object_unlock(object);
2893                 }
2894         }
2895
2896         assert(map_locked == TRUE);
2897
2898         if (!keep_map_locked) {
2899                 vm_map_unlock(map);
2900                 map_locked = FALSE;
2901         }
2902
2903         /*
2904          * We can't hold the map lock if we enter this block.
2905          */
2906
2907         if (result == KERN_SUCCESS) {
2908
2909                 /*      Wire down the new entry if the user
2910                  *      requested all new map entries be wired.
2911                  */
2912                 if ((map->wiring_required)||(superpage_size)) {
2913                         assert(!keep_map_locked);
2914                         pmap_empty = FALSE; /* pmap won't be empty */
2915                         kr = vm_map_wire_kernel(map, start, end,
2916                                              new_entry->protection, VM_KERN_MEMORY_MLOCK,
2917                                              TRUE);
2918                         result = kr;
2919                 }
2920
2921         }
2922
2923         if (result != KERN_SUCCESS) {
2924                 if (new_mapping_established) {
2925                         /*
2926                          * We have to get rid of the new mappings since we
2927                          * won't make them available to the user.
2928                          * Try and do that atomically, to minimize the risk
2929                          * that someone else create new mappings that range.
2930                          */
2931                         zap_new_map = vm_map_create(PMAP_NULL,
2932                                                     *address,
2933                                                     *address + size,
2934                                                     map->hdr.entries_pageable);
2935                         vm_map_set_page_shift(zap_new_map,
2936                                               VM_MAP_PAGE_SHIFT(map));
2937                         vm_map_disable_hole_optimization(zap_new_map);
2938
2939                         if (!map_locked) {
2940                                 vm_map_lock(map);
2941                                 map_locked = TRUE;
2942                         }
2943                         (void) vm_map_delete(map, *address, *address+size,
2944                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
2945                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
2946                                              zap_new_map);
2947                 }
2948                 if (zap_old_map != VM_MAP_NULL &&
2949                     zap_old_map->hdr.nentries != 0) {
2950                         vm_map_entry_t  entry1, entry2;
2951
2952                         /*
2953                          * The new mapping failed.  Attempt to restore
2954                          * the old mappings, saved in the "zap_old_map".
2955                          */
2956                         if (!map_locked) {
2957                                 vm_map_lock(map);
2958                                 map_locked = TRUE;
2959                         }
2960
2961                         /* first check if the coast is still clear */
2962                         start = vm_map_first_entry(zap_old_map)->vme_start;
2963                         end = vm_map_last_entry(zap_old_map)->vme_end;
2964                         if (vm_map_lookup_entry(map, start, &entry1) ||
2965                             vm_map_lookup_entry(map, end, &entry2) ||
2966                             entry1 != entry2) {
2967                                 /*
2968                                  * Part of that range has already been
2969                                  * re-mapped:  we can't restore the old
2970                                  * mappings...
2971                                  */
2972                                 vm_map_enter_restore_failures++;
2973                         } else {
2974                                 /*
2975                                  * Transfer the saved map entries from
2976                                  * "zap_old_map" to the original "map",
2977                                  * inserting them all after "entry1".
2978                                  */
2979                                 for (entry2 = vm_map_first_entry(zap_old_map);
2980                                      entry2 != vm_map_to_entry(zap_old_map);
2981                                      entry2 = vm_map_first_entry(zap_old_map)) {
2982                                         vm_map_size_t entry_size;
2983
2984                                         entry_size = (entry2->vme_end -
2985                                                       entry2->vme_start);
2986                                         vm_map_store_entry_unlink(zap_old_map,
2987                                                             entry2);
2988                                         zap_old_map->size -= entry_size;
2989                                         vm_map_store_entry_link(map, entry1, entry2);
2990                                         map->size += entry_size;
2991                                         entry1 = entry2;
2992                                 }
2993                                 if (map->wiring_required) {
2994                                         /*
2995                                          * XXX TODO: we should rewire the
2996                                          * old pages here...
2997                                          */
2998                                 }
2999                                 vm_map_enter_restore_successes++;
3000                         }
3001                 }
3002         }
3003
3004         /*
3005          * The caller is responsible for releasing the lock if it requested to
3006          * keep the map locked.
3007          */
3008         if (map_locked && !keep_map_locked) {
3009                 vm_map_unlock(map);
3010         }
3011
3012         /*
3013          * Get rid of the "zap_maps" and all the map entries that
3014          * they may still contain.
3015          */
3016         if (zap_old_map != VM_MAP_NULL) {
3017                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3018                 zap_old_map = VM_MAP_NULL;
3019         }
3020         if (zap_new_map != VM_MAP_NULL) {
3021                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3022                 zap_new_map = VM_MAP_NULL;
3023         }
3024
3025         return result;
3026
3027 #undef  RETURN
3028 }
3029
3030 #if __arm64__
3031 extern const struct memory_object_pager_ops fourk_pager_ops;
3032 kern_return_t
3033 vm_map_enter_fourk(
3034         vm_map_t                map,
3035         vm_map_offset_t         *address,       /* IN/OUT */
3036         vm_map_size_t           size,
3037         vm_map_offset_t         mask,
3038         int                     flags,
3039         vm_map_kernel_flags_t   vmk_flags,
3040         vm_tag_t                alias,
3041         vm_object_t             object,
3042         vm_object_offset_t      offset,
3043         boolean_t               needs_copy,
3044         vm_prot_t               cur_protection,
3045         vm_prot_t               max_protection,
3046         vm_inherit_t            inheritance)
3047 {
3048         vm_map_entry_t          entry, new_entry;
3049         vm_map_offset_t         start, fourk_start;
3050         vm_map_offset_t         end, fourk_end;
3051         vm_map_size_t           fourk_size;
3052         kern_return_t           result = KERN_SUCCESS;
3053         vm_map_t                zap_old_map = VM_MAP_NULL;
3054         vm_map_t                zap_new_map = VM_MAP_NULL;
3055         boolean_t               map_locked = FALSE;
3056         boolean_t               pmap_empty = TRUE;
3057         boolean_t               new_mapping_established = FALSE;
3058         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3059         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3060         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3061         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3062         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3063         boolean_t               is_submap = vmk_flags.vmkf_submap;
3064         boolean_t               permanent = vmk_flags.vmkf_permanent;
3065         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3066 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3067         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3068         vm_map_offset_t         effective_min_offset, effective_max_offset;
3069         kern_return_t           kr;
3070         boolean_t               clear_map_aligned = FALSE;
3071         memory_object_t         fourk_mem_obj;
3072         vm_object_t             fourk_object;
3073         vm_map_offset_t         fourk_pager_offset;
3074         int                     fourk_pager_index_start, fourk_pager_index_num;
3075         int                     cur_idx;
3076         boolean_t               fourk_copy;
3077         vm_object_t             copy_object;
3078         vm_object_offset_t      copy_offset;
3079
3080         fourk_mem_obj = MEMORY_OBJECT_NULL;
3081         fourk_object = VM_OBJECT_NULL;
3082
3083         if (superpage_size) {
3084                 return KERN_NOT_SUPPORTED;
3085         }
3086
3087 #if CONFIG_EMBEDDED
3088         if (cur_protection & VM_PROT_WRITE) {
3089                 if ((cur_protection & VM_PROT_EXECUTE) &&
3090                     !entry_for_jit) {
3091                         printf("EMBEDDED: %s: curprot cannot be write+execute. "
3092                                "turning off execute\n",
3093                                __FUNCTION__);
3094                         cur_protection &= ~VM_PROT_EXECUTE;
3095                 }
3096         }
3097 #endif /* CONFIG_EMBEDDED */
3098
3099         if (is_submap) {
3100                 return KERN_NOT_SUPPORTED;
3101         }
3102         if (vmk_flags.vmkf_already) {
3103                 return KERN_NOT_SUPPORTED;
3104         }
3105         if (purgable || entry_for_jit) {
3106                 return KERN_NOT_SUPPORTED;
3107         }
3108
3109         effective_min_offset = map->min_offset;
3110
3111         if (vmk_flags.vmkf_beyond_max) {
3112                 return KERN_NOT_SUPPORTED;
3113         } else {
3114                 effective_max_offset = map->max_offset;
3115         }
3116
3117         if (size == 0 ||
3118             (offset & FOURK_PAGE_MASK) != 0) {
3119                 *address = 0;
3120                 return KERN_INVALID_ARGUMENT;
3121         }
3122
3123 #define RETURN(value)   { result = value; goto BailOut; }
3124
3125         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3126         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3127
3128         if (!anywhere && overwrite) {
3129                 return KERN_NOT_SUPPORTED;
3130         }
3131         if (!anywhere && overwrite) {
3132                 /*
3133                  * Create a temporary VM map to hold the old mappings in the
3134                  * affected area while we create the new one.
3135                  * This avoids releasing the VM map lock in
3136                  * vm_map_entry_delete() and allows atomicity
3137                  * when we want to replace some mappings with a new one.
3138                  * It also allows us to restore the old VM mappings if the
3139                  * new mapping fails.
3140                  */
3141                 zap_old_map = vm_map_create(PMAP_NULL,
3142                                             *address,
3143                                             *address + size,
3144                                             map->hdr.entries_pageable);
3145                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3146                 vm_map_disable_hole_optimization(zap_old_map);
3147         }
3148
3149         fourk_start = *address;
3150         fourk_size = size;
3151         fourk_end = fourk_start + fourk_size;
3152
3153         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3154         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3155         size = end - start;
3156
3157         if (anywhere) {
3158                 return KERN_NOT_SUPPORTED;
3159         } else {
3160                 /*
3161                  *      Verify that:
3162                  *              the address doesn't itself violate
3163                  *              the mask requirement.
3164                  */
3165
3166                 vm_map_lock(map);
3167                 map_locked = TRUE;
3168                 if ((start & mask) != 0) {
3169                         RETURN(KERN_NO_SPACE);
3170                 }
3171
3172                 /*
3173                  *      ...     the address is within bounds
3174                  */
3175
3176                 end = start + size;
3177
3178                 if ((start < effective_min_offset) ||
3179                     (end > effective_max_offset) ||
3180                     (start >= end)) {
3181                         RETURN(KERN_INVALID_ADDRESS);
3182                 }
3183
3184                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3185                         /*
3186                          * Fixed mapping and "overwrite" flag: attempt to
3187                          * remove all existing mappings in the specified
3188                          * address range, saving them in our "zap_old_map".
3189                          */
3190                         (void) vm_map_delete(map, start, end,
3191                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3192                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3193                                              zap_old_map);
3194                 }
3195
3196                 /*
3197                  *      ...     the starting address isn't allocated
3198                  */
3199                 if (vm_map_lookup_entry(map, start, &entry)) {
3200                         vm_object_t cur_object, shadow_object;
3201
3202                         /*
3203                          * We might already some 4K mappings
3204                          * in a 16K page here.
3205                          */
3206
3207                         if (entry->vme_end - entry->vme_start
3208                             != SIXTEENK_PAGE_SIZE) {
3209                                 RETURN(KERN_NO_SPACE);
3210                         }
3211                         if (entry->is_sub_map) {
3212                                 RETURN(KERN_NO_SPACE);
3213                         }
3214                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3215                                 RETURN(KERN_NO_SPACE);
3216                         }
3217
3218                         /* go all the way down the shadow chain */
3219                         cur_object = VME_OBJECT(entry);
3220                         vm_object_lock(cur_object);
3221                         while (cur_object->shadow != VM_OBJECT_NULL) {
3222                                 shadow_object = cur_object->shadow;
3223                                 vm_object_lock(shadow_object);
3224                                 vm_object_unlock(cur_object);
3225                                 cur_object = shadow_object;
3226                                 shadow_object = VM_OBJECT_NULL;
3227                         }
3228                         if (cur_object->internal ||
3229                             cur_object->pager == NULL) {
3230                                 vm_object_unlock(cur_object);
3231                                 RETURN(KERN_NO_SPACE);
3232                         }
3233                         if (cur_object->pager->mo_pager_ops
3234                             != &fourk_pager_ops) {
3235                                 vm_object_unlock(cur_object);
3236                                 RETURN(KERN_NO_SPACE);
3237                         }
3238                         fourk_object = cur_object;
3239                         fourk_mem_obj = fourk_object->pager;
3240
3241                         /* keep the "4K" object alive */
3242                         vm_object_reference_locked(fourk_object);
3243                         vm_object_unlock(fourk_object);
3244
3245                         /* merge permissions */
3246                         entry->protection |= cur_protection;
3247                         entry->max_protection |= max_protection;
3248                         if ((entry->protection & (VM_PROT_WRITE |
3249                                                   VM_PROT_EXECUTE)) ==
3250                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3251                             fourk_binary_compatibility_unsafe &&
3252                             fourk_binary_compatibility_allow_wx) {
3253                                 /* write+execute: need to be "jit" */
3254                                 entry->used_for_jit = TRUE;
3255                         }
3256
3257                         goto map_in_fourk_pager;
3258                 }
3259
3260                 /*
3261                  *      ...     the next region doesn't overlap the
3262                  *              end point.
3263                  */
3264
3265                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3266                     (entry->vme_next->vme_start < end)) {
3267                         RETURN(KERN_NO_SPACE);
3268                 }
3269         }
3270
3271         /*
3272          *      At this point,
3273          *              "start" and "end" should define the endpoints of the
3274          *                      available new range, and
3275          *              "entry" should refer to the region before the new
3276          *                      range, and
3277          *
3278          *              the map should be locked.
3279          */
3280
3281         /* create a new "4K" pager */
3282         fourk_mem_obj = fourk_pager_create();
3283         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3284         assert(fourk_object);
3285
3286         /* keep the "4" object alive */
3287         vm_object_reference(fourk_object);
3288
3289         /* create a "copy" object, to map the "4K" object copy-on-write */
3290         fourk_copy = TRUE;
3291         result = vm_object_copy_strategically(fourk_object,
3292                                               0,
3293                                               end - start,
3294                                               &copy_object,
3295                                               &copy_offset,
3296                                               &fourk_copy);
3297         assert(result == KERN_SUCCESS);
3298         assert(copy_object != VM_OBJECT_NULL);
3299         assert(copy_offset == 0);
3300
3301         /* take a reference on the copy object, for this mapping */
3302         vm_object_reference(copy_object);
3303
3304         /* map the "4K" pager's copy object */
3305         new_entry =
3306                 vm_map_entry_insert(map, entry,
3307                                     vm_map_trunc_page(start,
3308                                                       VM_MAP_PAGE_MASK(map)),
3309                                     vm_map_round_page(end,
3310                                                       VM_MAP_PAGE_MASK(map)),
3311                                     copy_object,
3312                                     0, /* offset */
3313                                     FALSE, /* needs_copy */
3314                                     FALSE, FALSE,
3315                                     cur_protection, max_protection,
3316                                     VM_BEHAVIOR_DEFAULT,
3317                                     ((entry_for_jit)
3318                                      ? VM_INHERIT_NONE
3319                                      : inheritance),
3320                                     0,
3321                                     no_cache,
3322                                     permanent,
3323                                     superpage_size,
3324                                     clear_map_aligned,
3325                                     is_submap,
3326                                     FALSE, /* jit */
3327                                     alias);
3328         entry = new_entry;
3329
3330 #if VM_MAP_DEBUG_FOURK
3331         if (vm_map_debug_fourk) {
3332                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3333                        map,
3334                        (uint64_t) entry->vme_start,
3335                        (uint64_t) entry->vme_end,
3336                        fourk_mem_obj);
3337         }
3338 #endif /* VM_MAP_DEBUG_FOURK */
3339
3340         new_mapping_established = TRUE;
3341
3342 map_in_fourk_pager:
3343         /* "map" the original "object" where it belongs in the "4K" pager */
3344         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3345         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3346         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3347                 fourk_pager_index_num = 4;
3348         } else {
3349                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3350         }
3351         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3352                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3353         }
3354         for (cur_idx = 0;
3355              cur_idx < fourk_pager_index_num;
3356              cur_idx++) {
3357                 vm_object_t             old_object;
3358                 vm_object_offset_t      old_offset;
3359
3360                 kr = fourk_pager_populate(fourk_mem_obj,
3361                                           TRUE, /* overwrite */
3362                                           fourk_pager_index_start + cur_idx,
3363                                           object,
3364                                           (object
3365                                            ? (offset +
3366                                               (cur_idx * FOURK_PAGE_SIZE))
3367                                            : 0),
3368                                           &old_object,
3369                                           &old_offset);
3370 #if VM_MAP_DEBUG_FOURK
3371                 if (vm_map_debug_fourk) {
3372                         if (old_object == (vm_object_t) -1 &&
3373                             old_offset == (vm_object_offset_t) -1) {
3374                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3375                                        "pager [%p:0x%llx] "
3376                                        "populate[%d] "
3377                                        "[object:%p,offset:0x%llx]\n",
3378                                        map,
3379                                        (uint64_t) entry->vme_start,
3380                                        (uint64_t) entry->vme_end,
3381                                        fourk_mem_obj,
3382                                        VME_OFFSET(entry),
3383                                        fourk_pager_index_start + cur_idx,
3384                                        object,
3385                                        (object
3386                                         ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3387                                         : 0));
3388                         } else {
3389                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3390                                        "pager [%p:0x%llx] "
3391                                        "populate[%d] [object:%p,offset:0x%llx] "
3392                                        "old [%p:0x%llx]\n",
3393                                        map,
3394                                        (uint64_t) entry->vme_start,
3395                                        (uint64_t) entry->vme_end,
3396                                        fourk_mem_obj,
3397                                        VME_OFFSET(entry),
3398                                        fourk_pager_index_start + cur_idx,
3399                                        object,
3400                                        (object
3401                                         ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3402                                         : 0),
3403                                        old_object,
3404                                        old_offset);
3405                         }
3406                 }
3407 #endif /* VM_MAP_DEBUG_FOURK */
3408
3409                 assert(kr == KERN_SUCCESS);
3410                 if (object != old_object &&
3411                     object != VM_OBJECT_NULL &&
3412                     object != (vm_object_t) -1) {
3413                         vm_object_reference(object);
3414                 }
3415                 if (object != old_object &&
3416                     old_object != VM_OBJECT_NULL &&
3417                     old_object != (vm_object_t) -1) {
3418                         vm_object_deallocate(old_object);
3419                 }
3420         }
3421
3422 BailOut:
3423         assert(map_locked == TRUE);
3424
3425         if (fourk_object != VM_OBJECT_NULL) {
3426                 vm_object_deallocate(fourk_object);
3427                 fourk_object = VM_OBJECT_NULL;
3428                 fourk_mem_obj = MEMORY_OBJECT_NULL;
3429         }
3430
3431         if (result == KERN_SUCCESS) {
3432                 vm_prot_t pager_prot;
3433                 memory_object_t pager;
3434
3435 #if DEBUG
3436                 if (pmap_empty &&
3437                     !(vmk_flags.vmkf_no_pmap_check)) {
3438                         assert(vm_map_pmap_is_empty(map,
3439                                                     *address,
3440                                                     *address+size));
3441                 }
3442 #endif /* DEBUG */
3443
3444                 /*
3445                  * For "named" VM objects, let the pager know that the
3446                  * memory object is being mapped.  Some pagers need to keep
3447                  * track of this, to know when they can reclaim the memory
3448                  * object, for example.
3449                  * VM calls memory_object_map() for each mapping (specifying
3450                  * the protection of each mapping) and calls
3451                  * memory_object_last_unmap() when all the mappings are gone.
3452                  */
3453                 pager_prot = max_protection;
3454                 if (needs_copy) {
3455                         /*
3456                          * Copy-On-Write mapping: won't modify
3457                          * the memory object.
3458                          */
3459                         pager_prot &= ~VM_PROT_WRITE;
3460                 }
3461                 if (!is_submap &&
3462                     object != VM_OBJECT_NULL &&
3463                     object->named &&
3464                     object->pager != MEMORY_OBJECT_NULL) {
3465                         vm_object_lock(object);
3466                         pager = object->pager;
3467                         if (object->named &&
3468                             pager != MEMORY_OBJECT_NULL) {
3469                                 assert(object->pager_ready);
3470                                 vm_object_mapping_wait(object, THREAD_UNINT);
3471                                 vm_object_mapping_begin(object);
3472                                 vm_object_unlock(object);
3473
3474                                 kr = memory_object_map(pager, pager_prot);
3475                                 assert(kr == KERN_SUCCESS);
3476
3477                                 vm_object_lock(object);
3478                                 vm_object_mapping_end(object);
3479                         }
3480                         vm_object_unlock(object);
3481                 }
3482                 if (!is_submap &&
3483                     fourk_object != VM_OBJECT_NULL &&
3484                     fourk_object->named &&
3485                     fourk_object->pager != MEMORY_OBJECT_NULL) {
3486                         vm_object_lock(fourk_object);
3487                         pager = fourk_object->pager;
3488                         if (fourk_object->named &&
3489                             pager != MEMORY_OBJECT_NULL) {
3490                                 assert(fourk_object->pager_ready);
3491                                 vm_object_mapping_wait(fourk_object,
3492                                                        THREAD_UNINT);
3493                                 vm_object_mapping_begin(fourk_object);
3494                                 vm_object_unlock(fourk_object);
3495
3496                                 kr = memory_object_map(pager, VM_PROT_READ);
3497                                 assert(kr == KERN_SUCCESS);
3498
3499                                 vm_object_lock(fourk_object);
3500                                 vm_object_mapping_end(fourk_object);
3501                         }
3502                         vm_object_unlock(fourk_object);
3503                 }
3504         }
3505
3506         assert(map_locked == TRUE);
3507
3508         if (!keep_map_locked) {
3509                 vm_map_unlock(map);
3510                 map_locked = FALSE;
3511         }
3512
3513         /*
3514          * We can't hold the map lock if we enter this block.
3515          */
3516
3517         if (result == KERN_SUCCESS) {
3518
3519                 /*      Wire down the new entry if the user
3520                  *      requested all new map entries be wired.
3521                  */
3522                 if ((map->wiring_required)||(superpage_size)) {
3523                         assert(!keep_map_locked);
3524                         pmap_empty = FALSE; /* pmap won't be empty */
3525                         kr = vm_map_wire_kernel(map, start, end,
3526                                              new_entry->protection, VM_KERN_MEMORY_MLOCK,
3527                                              TRUE);
3528                         result = kr;
3529                 }
3530
3531         }
3532
3533         if (result != KERN_SUCCESS) {
3534                 if (new_mapping_established) {
3535                         /*
3536                          * We have to get rid of the new mappings since we
3537                          * won't make them available to the user.
3538                          * Try and do that atomically, to minimize the risk
3539                          * that someone else create new mappings that range.
3540                          */
3541                         zap_new_map = vm_map_create(PMAP_NULL,
3542                                                     *address,
3543                                                     *address + size,
3544                                                     map->hdr.entries_pageable);
3545                         vm_map_set_page_shift(zap_new_map,
3546                                               VM_MAP_PAGE_SHIFT(map));
3547                         vm_map_disable_hole_optimization(zap_new_map);
3548
3549                         if (!map_locked) {
3550                                 vm_map_lock(map);
3551                                 map_locked = TRUE;
3552                         }
3553                         (void) vm_map_delete(map, *address, *address+size,
3554                                              (VM_MAP_REMOVE_SAVE_ENTRIES |
3555                                               VM_MAP_REMOVE_NO_MAP_ALIGN),
3556                                              zap_new_map);
3557                 }
3558                 if (zap_old_map != VM_MAP_NULL &&
3559                     zap_old_map->hdr.nentries != 0) {
3560                         vm_map_entry_t  entry1, entry2;
3561
3562                         /*
3563                          * The new mapping failed.  Attempt to restore
3564                          * the old mappings, saved in the "zap_old_map".
3565                          */
3566                         if (!map_locked) {
3567                                 vm_map_lock(map);
3568                                 map_locked = TRUE;
3569                         }
3570
3571                         /* first check if the coast is still clear */
3572                         start = vm_map_first_entry(zap_old_map)->vme_start;
3573                         end = vm_map_last_entry(zap_old_map)->vme_end;
3574                         if (vm_map_lookup_entry(map, start, &entry1) ||
3575                             vm_map_lookup_entry(map, end, &entry2) ||
3576                             entry1 != entry2) {
3577                                 /*
3578                                  * Part of that range has already been
3579                                  * re-mapped:  we can't restore the old
3580                                  * mappings...
3581                                  */
3582                                 vm_map_enter_restore_failures++;
3583                         } else {
3584                                 /*
3585                                  * Transfer the saved map entries from
3586                                  * "zap_old_map" to the original "map",
3587                                  * inserting them all after "entry1".
3588                                  */
3589                                 for (entry2 = vm_map_first_entry(zap_old_map);
3590                                      entry2 != vm_map_to_entry(zap_old_map);
3591                                      entry2 = vm_map_first_entry(zap_old_map)) {
3592                                         vm_map_size_t entry_size;
3593
3594                                         entry_size = (entry2->vme_end -
3595                                                       entry2->vme_start);
3596                                         vm_map_store_entry_unlink(zap_old_map,
3597                                                             entry2);
3598                                         zap_old_map->size -= entry_size;
3599                                         vm_map_store_entry_link(map, entry1, entry2);
3600                                         map->size += entry_size;
3601                                         entry1 = entry2;
3602                                 }
3603                                 if (map->wiring_required) {
3604                                         /*
3605                                          * XXX TODO: we should rewire the
3606                                          * old pages here...
3607                                          */
3608                                 }
3609                                 vm_map_enter_restore_successes++;
3610                         }
3611                 }
3612         }
3613
3614         /*
3615          * The caller is responsible for releasing the lock if it requested to
3616          * keep the map locked.
3617          */
3618         if (map_locked && !keep_map_locked) {
3619                 vm_map_unlock(map);
3620         }
3621
3622         /*
3623          * Get rid of the "zap_maps" and all the map entries that
3624          * they may still contain.
3625          */
3626         if (zap_old_map != VM_MAP_NULL) {
3627                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3628                 zap_old_map = VM_MAP_NULL;
3629         }
3630         if (zap_new_map != VM_MAP_NULL) {
3631                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3632                 zap_new_map = VM_MAP_NULL;
3633         }
3634
3635         return result;
3636
3637 #undef  RETURN
3638 }
3639 #endif /* __arm64__ */
3640
3641 /*
3642  * Counters for the prefault optimization.
3643  */
3644 int64_t vm_prefault_nb_pages = 0;
3645 int64_t vm_prefault_nb_bailout = 0;
3646
3647 static kern_return_t
3648 vm_map_enter_mem_object_helper(
3649         vm_map_t                target_map,
3650         vm_map_offset_t         *address,
3651         vm_map_size_t           initial_size,
3652         vm_map_offset_t         mask,
3653         int                     flags,
3654         vm_map_kernel_flags_t   vmk_flags,
3655         vm_tag_t                tag,
3656         ipc_port_t              port,
3657         vm_object_offset_t      offset,
3658         boolean_t               copy,
3659         vm_prot_t               cur_protection,
3660         vm_prot_t               max_protection,
3661         vm_inherit_t            inheritance,
3662         upl_page_list_ptr_t     page_list,
3663         unsigned int            page_list_count)
3664 {
3665         vm_map_address_t        map_addr;
3666         vm_map_size_t           map_size;
3667         vm_object_t             object;
3668         vm_object_size_t        size;
3669         kern_return_t           result;
3670         boolean_t               mask_cur_protection, mask_max_protection;
3671         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
3672         vm_map_offset_t         offset_in_mapping = 0;
3673 #if __arm64__
3674         boolean_t               fourk = vmk_flags.vmkf_fourk;
3675 #endif /* __arm64__ */
3676
3677         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
3678
3679         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
3680         mask_max_protection = max_protection & VM_PROT_IS_MASK;
3681         cur_protection &= ~VM_PROT_IS_MASK;
3682         max_protection &= ~VM_PROT_IS_MASK;
3683
3684         /*
3685          * Check arguments for validity
3686          */
3687         if ((target_map == VM_MAP_NULL) ||
3688             (cur_protection & ~VM_PROT_ALL) ||
3689             (max_protection & ~VM_PROT_ALL) ||
3690             (inheritance > VM_INHERIT_LAST_VALID) ||
3691             (try_prefault && (copy || !page_list)) ||
3692             initial_size == 0) {
3693                 return KERN_INVALID_ARGUMENT;
3694         }
3695
3696 #if __arm64__
3697         if (fourk) {
3698                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
3699                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
3700         } else
3701 #endif /* __arm64__ */
3702         {
3703                 map_addr = vm_map_trunc_page(*address,
3704                                              VM_MAP_PAGE_MASK(target_map));
3705                 map_size = vm_map_round_page(initial_size,
3706                                              VM_MAP_PAGE_MASK(target_map));
3707         }
3708         size = vm_object_round_page(initial_size);
3709
3710         /*
3711          * Find the vm object (if any) corresponding to this port.
3712          */
3713         if (!IP_VALID(port)) {
3714                 object = VM_OBJECT_NULL;
3715                 offset = 0;
3716                 copy = FALSE;
3717         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
3718                 vm_named_entry_t        named_entry;
3719
3720                 named_entry = (vm_named_entry_t) port->ip_kobject;
3721
3722                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3723                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3724                         offset += named_entry->data_offset;
3725                 }
3726
3727                 /* a few checks to make sure user is obeying rules */
3728                 if (size == 0) {
3729                         if (offset >= named_entry->size)
3730                                 return KERN_INVALID_RIGHT;
3731                         size = named_entry->size - offset;
3732                 }
3733                 if (mask_max_protection) {
3734                         max_protection &= named_entry->protection;
3735                 }
3736                 if (mask_cur_protection) {
3737                         cur_protection &= named_entry->protection;
3738                 }
3739                 if ((named_entry->protection & max_protection) !=
3740                     max_protection)
3741                         return KERN_INVALID_RIGHT;
3742                 if ((named_entry->protection & cur_protection) !=
3743                     cur_protection)
3744                         return KERN_INVALID_RIGHT;
3745                 if (offset + size < offset) {
3746                         /* overflow */
3747                         return KERN_INVALID_ARGUMENT;
3748                 }
3749                 if (named_entry->size < (offset + initial_size)) {
3750                         return KERN_INVALID_ARGUMENT;
3751                 }
3752
3753                 if (named_entry->is_copy) {
3754                         /* for a vm_map_copy, we can only map it whole */
3755                         if ((size != named_entry->size) &&
3756                             (vm_map_round_page(size,
3757                                                VM_MAP_PAGE_MASK(target_map)) ==
3758                              named_entry->size)) {
3759                                 /* XXX FBDP use the rounded size... */
3760                                 size = vm_map_round_page(
3761                                         size,
3762                                         VM_MAP_PAGE_MASK(target_map));
3763                         }
3764
3765                         if (!(flags & VM_FLAGS_ANYWHERE) &&
3766                             (offset != 0 ||
3767                              size != named_entry->size)) {
3768                                 /*
3769                                  * XXX for a mapping at a "fixed" address,
3770                                  * we can't trim after mapping the whole
3771                                  * memory entry, so reject a request for a
3772                                  * partial mapping.
3773                                  */
3774                                 return KERN_INVALID_ARGUMENT;
3775                         }
3776                 }
3777
3778                 /* the callers parameter offset is defined to be the */
3779                 /* offset from beginning of named entry offset in object */
3780                 offset = offset + named_entry->offset;
3781
3782                 if (! VM_MAP_PAGE_ALIGNED(size,
3783                                           VM_MAP_PAGE_MASK(target_map))) {
3784                         /*
3785                          * Let's not map more than requested;
3786                          * vm_map_enter() will handle this "not map-aligned"
3787                          * case.
3788                          */
3789                         map_size = size;
3790                 }
3791
3792                 named_entry_lock(named_entry);
3793                 if (named_entry->is_sub_map) {
3794                         vm_map_t                submap;
3795
3796                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3797                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3798                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
3799                         }
3800
3801                         submap = named_entry->backing.map;
3802                         vm_map_lock(submap);
3803                         vm_map_reference(submap);
3804                         vm_map_unlock(submap);
3805                         named_entry_unlock(named_entry);
3806
3807                         vmk_flags.vmkf_submap = TRUE;
3808
3809                         result = vm_map_enter(target_map,
3810                                               &map_addr,
3811                                               map_size,
3812                                               mask,
3813                                               flags,
3814                                               vmk_flags,
3815                                               tag,
3816                                               (vm_object_t) submap,
3817                                               offset,
3818                                               copy,
3819                                               cur_protection,
3820                                               max_protection,
3821                                               inheritance);
3822                         if (result != KERN_SUCCESS) {
3823                                 vm_map_deallocate(submap);
3824                         } else {
3825                                 /*
3826                                  * No need to lock "submap" just to check its
3827                                  * "mapped" flag: that flag is never reset
3828                                  * once it's been set and if we race, we'll
3829                                  * just end up setting it twice, which is OK.
3830                                  */
3831                                 if (submap->mapped_in_other_pmaps == FALSE &&
3832                                     vm_map_pmap(submap) != PMAP_NULL &&
3833                                     vm_map_pmap(submap) !=
3834                                     vm_map_pmap(target_map)) {
3835                                         /*
3836                                          * This submap is being mapped in a map
3837                                          * that uses a different pmap.
3838                                          * Set its "mapped_in_other_pmaps" flag
3839                                          * to indicate that we now need to
3840                                          * remove mappings from all pmaps rather
3841                                          * than just the submap's pmap.
3842                                          */
3843                                         vm_map_lock(submap);
3844                                         submap->mapped_in_other_pmaps = TRUE;
3845                                         vm_map_unlock(submap);
3846                                 }
3847                                 *address = map_addr;
3848                         }
3849                         return result;
3850
3851                 } else if (named_entry->is_copy) {
3852                         kern_return_t   kr;
3853                         vm_map_copy_t   copy_map;
3854                         vm_map_entry_t  copy_entry;
3855                         vm_map_offset_t copy_addr;
3856
3857                         if (flags & ~(VM_FLAGS_FIXED |
3858                                       VM_FLAGS_ANYWHERE |
3859                                       VM_FLAGS_OVERWRITE |
3860                                       VM_FLAGS_RETURN_4K_DATA_ADDR |
3861                                       VM_FLAGS_RETURN_DATA_ADDR |
3862                                       VM_FLAGS_ALIAS_MASK)) {
3863                                 named_entry_unlock(named_entry);
3864                                 return KERN_INVALID_ARGUMENT;
3865                         }
3866
3867                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
3868                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
3869                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
3870                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
3871                                         offset_in_mapping &= ~((signed)(0xFFF));
3872                                 offset = vm_object_trunc_page(offset);
3873                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
3874                         }
3875
3876                         copy_map = named_entry->backing.copy;
3877                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
3878                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
3879                                 /* unsupported type; should not happen */
3880                                 printf("vm_map_enter_mem_object: "
3881                                        "memory_entry->backing.copy "
3882                                        "unsupported type 0x%x\n",
3883                                        copy_map->type);
3884                                 named_entry_unlock(named_entry);
3885                                 return KERN_INVALID_ARGUMENT;
3886                         }
3887
3888                         /* reserve a contiguous range */
3889                         kr = vm_map_enter(target_map,
3890                                           &map_addr,
3891                                           /* map whole mem entry, trim later: */
3892                                           named_entry->size,
3893                                           mask,
3894                                           flags & (VM_FLAGS_ANYWHERE |
3895                                                    VM_FLAGS_OVERWRITE |
3896                                                    VM_FLAGS_RETURN_4K_DATA_ADDR |
3897                                                    VM_FLAGS_RETURN_DATA_ADDR),
3898                                           vmk_flags,
3899                                           tag,
3900                                           VM_OBJECT_NULL,
3901                                           0,
3902                                           FALSE, /* copy */
3903                                           cur_protection,
3904                                           max_protection,
3905                                           inheritance);
3906                         if (kr != KERN_SUCCESS) {
3907                                 named_entry_unlock(named_entry);
3908                                 return kr;
3909                         }
3910
3911                         copy_addr = map_addr;
3912
3913                         for (copy_entry = vm_map_copy_first_entry(copy_map);
3914                              copy_entry != vm_map_copy_to_entry(copy_map);
3915                              copy_entry = copy_entry->vme_next) {
3916                                 int                     remap_flags;
3917                                 vm_map_kernel_flags_t   vmk_remap_flags;
3918                                 vm_map_t                copy_submap;
3919                                 vm_object_t             copy_object;
3920                                 vm_map_size_t           copy_size;
3921                                 vm_object_offset_t      copy_offset;
3922                                 int                     copy_vm_alias;
3923
3924                                 remap_flags = 0;
3925                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
3926
3927                                 copy_object = VME_OBJECT(copy_entry);
3928                                 copy_offset = VME_OFFSET(copy_entry);
3929                                 copy_size = (copy_entry->vme_end -
3930                                              copy_entry->vme_start);
3931                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
3932                                 if (copy_vm_alias == 0) {
3933                                         /*
3934                                          * Caller does not want a specific
3935                                          * alias for this new mapping:  use
3936                                          * the alias of the original mapping.
3937                                          */
3938                                         copy_vm_alias = VME_ALIAS(copy_entry);
3939                                 }
3940
3941                                 /* sanity check */
3942                                 if ((copy_addr + copy_size) >
3943                                     (map_addr +
3944                                      named_entry->size /* XXX full size */ )) {
3945                                         /* over-mapping too much !? */
3946                                         kr = KERN_INVALID_ARGUMENT;
3947                                         /* abort */
3948                                         break;
3949                                 }
3950
3951                                 /* take a reference on the object */
3952                                 if (copy_entry->is_sub_map) {
3953                                         vmk_remap_flags.vmkf_submap = TRUE;
3954                                         copy_submap = VME_SUBMAP(copy_entry);
3955                                         vm_map_lock(copy_submap);
3956                                         vm_map_reference(copy_submap);
3957                                         vm_map_unlock(copy_submap);
3958                                         copy_object = (vm_object_t) copy_submap;
3959                                 } else if (!copy &&
3960                                            copy_object != VM_OBJECT_NULL &&
3961                                            (copy_entry->needs_copy ||
3962                                             copy_object->shadowed ||
3963                                             (!copy_object->true_share &&
3964                                              !copy_entry->is_shared &&
3965                                              copy_object->vo_size > copy_size))) {
3966                                         /*
3967                                          * We need to resolve our side of this
3968                                          * "symmetric" copy-on-write now; we
3969                                          * need a new object to map and share,
3970                                          * instead of the current one which
3971                                          * might still be shared with the
3972                                          * original mapping.
3973                                          *
3974                                          * Note: A "vm_map_copy_t" does not
3975                                          * have a lock but we're protected by
3976                                          * the named entry's lock here.
3977                                          */
3978                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
3979                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
3980                                         if (!copy_entry->needs_copy &&
3981                                             copy_entry->protection & VM_PROT_WRITE) {
3982                                                 vm_prot_t prot;
3983
3984                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
3985                                                 vm_object_pmap_protect(copy_object,
3986                                                                        copy_offset,
3987                                                                        copy_size,
3988                                                                        PMAP_NULL,
3989                                                                        0,
3990                                                                        prot);
3991                                         }
3992
3993                                         copy_entry->needs_copy = FALSE;
3994                                         copy_entry->is_shared = TRUE;
3995                                         copy_object = VME_OBJECT(copy_entry);
3996                                         copy_offset = VME_OFFSET(copy_entry);
3997                                         vm_object_lock(copy_object);
3998                                         vm_object_reference_locked(copy_object);
3999                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4000                                                 /* we're about to make a shared mapping of this object */
4001                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4002                                                 copy_object->true_share = TRUE;
4003                                         }
4004                                         vm_object_unlock(copy_object);
4005                                 } else {
4006                                         /*
4007                                          * We already have the right object
4008                                          * to map.
4009                                          */
4010                                         copy_object = VME_OBJECT(copy_entry);
4011                                         vm_object_reference(copy_object);
4012                                 }
4013
4014                                 /* over-map the object into destination */
4015                                 remap_flags |= flags;
4016                                 remap_flags |= VM_FLAGS_FIXED;
4017                                 remap_flags |= VM_FLAGS_OVERWRITE;
4018                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4019                                 if (!copy && !copy_entry->is_sub_map) {
4020                                         /*
4021                                          * copy-on-write should have been
4022                                          * resolved at this point, or we would
4023                                          * end up sharing instead of copying.
4024                                          */
4025                                         assert(!copy_entry->needs_copy);
4026                                 }
4027                                 kr = vm_map_enter(target_map,
4028                                                   &copy_addr,
4029                                                   copy_size,
4030                                                   (vm_map_offset_t) 0,
4031                                                   remap_flags,
4032                                                   vmk_remap_flags,
4033                                                   copy_vm_alias,
4034                                                   copy_object,
4035                                                   copy_offset,
4036                                                   copy,
4037                                                   cur_protection,
4038                                                   max_protection,
4039                                                   inheritance);
4040                                 if (kr != KERN_SUCCESS) {
4041                                         if (copy_entry->is_sub_map) {
4042                                                 vm_map_deallocate(copy_submap);
4043                                         } else {
4044                                                 vm_object_deallocate(copy_object);
4045                                         }
4046                                         /* abort */
4047                                         break;
4048                                 }
4049
4050                                 /* next mapping */
4051                                 copy_addr += copy_size;
4052                         }
4053
4054                         if (kr == KERN_SUCCESS) {
4055                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4056                                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4057                                         *address = map_addr + offset_in_mapping;
4058                                 } else {
4059                                         *address = map_addr;
4060                                 }
4061
4062                                 if (offset) {
4063                                         /*
4064                                          * Trim in front, from 0 to "offset".
4065                                          */
4066                                         vm_map_remove(target_map,
4067                                                       map_addr,
4068                                                       map_addr + offset,
4069                                                       0);
4070                                         *address += offset;
4071                                 }
4072                                 if (offset + map_size < named_entry->size) {
4073                                         /*
4074                                          * Trim in back, from
4075                                          * "offset + map_size" to
4076                                          * "named_entry->size".
4077                                          */
4078                                         vm_map_remove(target_map,
4079                                                       (map_addr +
4080                                                        offset + map_size),
4081                                                       (map_addr +
4082                                                        named_entry->size),
4083                                                       0);
4084                                 }
4085                         }
4086                         named_entry_unlock(named_entry);
4087
4088                         if (kr != KERN_SUCCESS) {
4089                                 if (! (flags & VM_FLAGS_OVERWRITE)) {
4090                                         /* deallocate the contiguous range */
4091                                         (void) vm_deallocate(target_map,
4092                                                              map_addr,
4093                                                              map_size);
4094                                 }
4095                         }
4096
4097                         return kr;
4098
4099                 } else {
4100                         unsigned int    access;
4101                         vm_prot_t       protections;
4102                         unsigned int    wimg_mode;
4103
4104                         /* we are mapping a VM object */
4105
4106                         protections = named_entry->protection & VM_PROT_ALL;
4107                         access = GET_MAP_MEM(named_entry->protection);
4108
4109                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4110                                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4111                                 offset_in_mapping = offset - vm_object_trunc_page(offset);
4112                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR)
4113                                         offset_in_mapping &= ~((signed)(0xFFF));
4114                                 offset = vm_object_trunc_page(offset);
4115                                 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset;
4116                         }
4117
4118                         object = named_entry->backing.object;
4119                         assert(object != VM_OBJECT_NULL);
4120                         vm_object_lock(object);
4121                         named_entry_unlock(named_entry);
4122
4123                         vm_object_reference_locked(object);
4124
4125                         wimg_mode = object->wimg_bits;
4126                         vm_prot_to_wimg(access, &wimg_mode);
4127                         if (object->wimg_bits != wimg_mode)
4128                                 vm_object_change_wimg_mode(object, wimg_mode);
4129
4130                         vm_object_unlock(object);
4131                 }
4132         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4133                 /*
4134                  * JMM - This is temporary until we unify named entries
4135                  * and raw memory objects.
4136                  *
4137                  * Detected fake ip_kotype for a memory object.  In
4138                  * this case, the port isn't really a port at all, but
4139                  * instead is just a raw memory object.
4140                  */
4141                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4142                              VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4143                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4144                 }
4145
4146                 object = memory_object_to_vm_object((memory_object_t)port);
4147                 if (object == VM_OBJECT_NULL)
4148                         return KERN_INVALID_OBJECT;
4149                 vm_object_reference(object);
4150
4151                 /* wait for object (if any) to be ready */
4152                 if (object != VM_OBJECT_NULL) {
4153                         if (object == kernel_object) {
4154                                 printf("Warning: Attempt to map kernel object"
4155                                         " by a non-private kernel entity\n");
4156                                 return KERN_INVALID_OBJECT;
4157                         }
4158                         if (!object->pager_ready) {
4159                                 vm_object_lock(object);
4160
4161                                 while (!object->pager_ready) {
4162                                         vm_object_wait(object,
4163                                                        VM_OBJECT_EVENT_PAGER_READY,
4164                                                        THREAD_UNINT);
4165                                         vm_object_lock(object);
4166                                 }
4167                                 vm_object_unlock(object);
4168                         }
4169                 }
4170         } else {
4171                 return KERN_INVALID_OBJECT;
4172         }
4173
4174         if (object != VM_OBJECT_NULL &&
4175             object->named &&
4176             object->pager != MEMORY_OBJECT_NULL &&
4177             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4178                 memory_object_t pager;
4179                 vm_prot_t       pager_prot;
4180                 kern_return_t   kr;
4181
4182                 /*
4183                  * For "named" VM objects, let the pager know that the
4184                  * memory object is being mapped.  Some pagers need to keep
4185                  * track of this, to know when they can reclaim the memory
4186                  * object, for example.
4187                  * VM calls memory_object_map() for each mapping (specifying
4188                  * the protection of each mapping) and calls
4189                  * memory_object_last_unmap() when all the mappings are gone.
4190                  */
4191                 pager_prot = max_protection;
4192                 if (copy) {
4193                         /*
4194                          * Copy-On-Write mapping: won't modify the
4195                          * memory object.
4196                          */
4197                         pager_prot &= ~VM_PROT_WRITE;
4198                 }
4199                 vm_object_lock(object);
4200                 pager = object->pager;
4201                 if (object->named &&
4202                     pager != MEMORY_OBJECT_NULL &&
4203                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4204                         assert(object->pager_ready);
4205                         vm_object_mapping_wait(object, THREAD_UNINT);
4206                         vm_object_mapping_begin(object);
4207                         vm_object_unlock(object);
4208
4209                         kr = memory_object_map(pager, pager_prot);
4210                         assert(kr == KERN_SUCCESS);
4211
4212                         vm_object_lock(object);
4213                         vm_object_mapping_end(object);
4214                 }
4215                 vm_object_unlock(object);
4216         }
4217
4218         /*
4219          *      Perform the copy if requested
4220          */
4221
4222         if (copy) {
4223                 vm_object_t             new_object;
4224                 vm_object_offset_t      new_offset;
4225
4226                 result = vm_object_copy_strategically(object, offset,
4227                                                       map_size,
4228                                                       &new_object, &new_offset,
4229                                                       &copy);
4230
4231
4232                 if (result == KERN_MEMORY_RESTART_COPY) {
4233                         boolean_t success;
4234                         boolean_t src_needs_copy;
4235
4236                         /*
4237                          * XXX
4238                          * We currently ignore src_needs_copy.
4239                          * This really is the issue of how to make
4240                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4241                          * non-kernel users to use. Solution forthcoming.
4242                          * In the meantime, since we don't allow non-kernel
4243                          * memory managers to specify symmetric copy,
4244                          * we won't run into problems here.
4245                          */
4246                         new_object = object;
4247                         new_offset = offset;
4248                         success = vm_object_copy_quickly(&new_object,
4249                                                          new_offset,
4250                                                          map_size,
4251                                                          &src_needs_copy,
4252                                                          &copy);
4253                         assert(success);
4254                         result = KERN_SUCCESS;
4255                 }
4256                 /*
4257                  *      Throw away the reference to the
4258                  *      original object, as it won't be mapped.
4259                  */
4260
4261                 vm_object_deallocate(object);
4262
4263                 if (result != KERN_SUCCESS) {
4264                         return result;
4265                 }
4266
4267                 object = new_object;
4268                 offset = new_offset;
4269         }
4270
4271         /*
4272          * If non-kernel users want to try to prefault pages, the mapping and prefault
4273          * needs to be atomic.
4274          */
4275         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4276         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4277
4278 #if __arm64__
4279         if (fourk) {
4280                 /* map this object in a "4K" pager */
4281                 result = vm_map_enter_fourk(target_map,
4282                                             &map_addr,
4283                                             map_size,
4284                                             (vm_map_offset_t) mask,
4285                                             flags,
4286                                             vmk_flags,
4287                                             tag,
4288                                             object,
4289                                             offset,
4290                                             copy,
4291                                             cur_protection,
4292                                             max_protection,
4293                                             inheritance);
4294         } else
4295 #endif /* __arm64__ */
4296         {
4297                 result = vm_map_enter(target_map,
4298                                       &map_addr, map_size,
4299                                       (vm_map_offset_t)mask,
4300                                       flags,
4301                                       vmk_flags,
4302                                       tag,
4303                                       object, offset,
4304                                       copy,
4305                                       cur_protection, max_protection,
4306                                       inheritance);
4307         }
4308         if (result != KERN_SUCCESS)
4309                 vm_object_deallocate(object);
4310
4311         /*
4312          * Try to prefault, and do not forget to release the vm map lock.
4313          */
4314         if (result == KERN_SUCCESS && try_prefault) {
4315                 mach_vm_address_t va = map_addr;
4316                 kern_return_t kr = KERN_SUCCESS;
4317                 unsigned int i = 0;
4318                 int pmap_options;
4319
4320                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4321                 if (object->internal) {
4322                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4323                 }
4324
4325                 for (i = 0; i < page_list_count; ++i) {
4326                         if (!UPL_VALID_PAGE(page_list, i)) {
4327                                 if (kernel_prefault) {
4328                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4329                                         result = KERN_MEMORY_ERROR;
4330                                         break;
4331                                 }
4332                         } else {
4333                                 /*
4334                                  * If this function call failed, we should stop
4335                                  * trying to optimize, other calls are likely
4336                                  * going to fail too.
4337                                  *
4338                                  * We are not gonna report an error for such
4339                                  * failure though. That's an optimization, not
4340                                  * something critical.
4341                                  */
4342                                 kr = pmap_enter_options(target_map->pmap,
4343                                                         va, UPL_PHYS_PAGE(page_list, i),
4344                                                         cur_protection, VM_PROT_NONE,
4345                                                         0, TRUE, pmap_options, NULL);
4346                                 if (kr != KERN_SUCCESS) {
4347                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4348                                         if (kernel_prefault) {
4349                                                 result = kr;
4350                                         }
4351                                         break;
4352                                 }
4353                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4354                         }
4355
4356                         /* Next virtual address */
4357                         va += PAGE_SIZE;
4358                 }
4359                 if (vmk_flags.vmkf_keep_map_locked) {
4360                         vm_map_unlock(target_map);
4361                 }
4362         }
4363
4364         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4365                      VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4366                 *address = map_addr + offset_in_mapping;
4367         } else {
4368                 *address = map_addr;
4369         }
4370         return result;
4371 }
4372
4373 kern_return_t
4374 vm_map_enter_mem_object(
4375         vm_map_t                target_map,
4376         vm_map_offset_t         *address,
4377         vm_map_size_t           initial_size,
4378         vm_map_offset_t         mask,
4379         int                     flags,
4380         vm_map_kernel_flags_t   vmk_flags,
4381         vm_tag_t                tag,
4382         ipc_port_t              port,
4383         vm_object_offset_t      offset,
4384         boolean_t               copy,
4385         vm_prot_t               cur_protection,
4386         vm_prot_t               max_protection,
4387         vm_inherit_t            inheritance)
4388 {
4389         kern_return_t ret;
4390
4391         ret = vm_map_enter_mem_object_helper(target_map,
4392                                              address,
4393                                              initial_size,
4394                                              mask,
4395                                              flags,
4396                                              vmk_flags,
4397                                              tag,
4398                                              port,
4399                                              offset,
4400                                              copy,
4401                                              cur_protection,
4402                                              max_protection,
4403                                              inheritance,
4404                                              NULL,
4405                                              0);
4406
4407 #if KASAN
4408         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4409                 kasan_notify_address(*address, initial_size);
4410         }
4411 #endif
4412
4413         return ret;
4414 }
4415
4416 kern_return_t
4417 vm_map_enter_mem_object_prefault(
4418         vm_map_t                target_map,
4419         vm_map_offset_t         *address,
4420         vm_map_size_t           initial_size,
4421         vm_map_offset_t         mask,
4422         int                     flags,
4423         vm_map_kernel_flags_t   vmk_flags,
4424         vm_tag_t                tag,
4425         ipc_port_t              port,
4426         vm_object_offset_t      offset,
4427         vm_prot_t               cur_protection,
4428         vm_prot_t               max_protection,
4429         upl_page_list_ptr_t     page_list,
4430         unsigned int            page_list_count)
4431 {
4432         kern_return_t ret;
4433
4434         ret = vm_map_enter_mem_object_helper(target_map,
4435                                              address,
4436                                              initial_size,
4437                                              mask,
4438                                              flags,
4439                                              vmk_flags,
4440                                              tag,
4441                                              port,
4442                                              offset,
4443                                              FALSE,
4444                                              cur_protection,
4445                                              max_protection,
4446                                              VM_INHERIT_DEFAULT,
4447                                              page_list,
4448                                              page_list_count);
4449
4450 #if KASAN
4451         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
4452                 kasan_notify_address(*address, initial_size);
4453         }
4454 #endif
4455
4456         return ret;
4457 }
4458
4459
4460 kern_return_t
4461 vm_map_enter_mem_object_control(
4462         vm_map_t                target_map,
4463         vm_map_offset_t         *address,
4464         vm_map_size_t           initial_size,
4465         vm_map_offset_t         mask,
4466         int                     flags,
4467         vm_map_kernel_flags_t   vmk_flags,
4468         vm_tag_t                tag,
4469         memory_object_control_t control,
4470         vm_object_offset_t      offset,
4471         boolean_t               copy,
4472         vm_prot_t               cur_protection,
4473         vm_prot_t               max_protection,
4474         vm_inherit_t            inheritance)
4475 {
4476         vm_map_address_t        map_addr;
4477         vm_map_size_t           map_size;
4478         vm_object_t             object;
4479         vm_object_size_t        size;
4480         kern_return_t           result;
4481         memory_object_t         pager;
4482         vm_prot_t               pager_prot;
4483         kern_return_t           kr;
4484 #if __arm64__
4485         boolean_t               fourk = vmk_flags.vmkf_fourk;
4486 #endif /* __arm64__ */
4487
4488         /*
4489          * Check arguments for validity
4490          */
4491         if ((target_map == VM_MAP_NULL) ||
4492             (cur_protection & ~VM_PROT_ALL) ||
4493             (max_protection & ~VM_PROT_ALL) ||
4494             (inheritance > VM_INHERIT_LAST_VALID) ||
4495             initial_size == 0) {
4496                 return KERN_INVALID_ARGUMENT;
4497         }
4498
4499 #if __arm64__
4500         if (fourk) {
4501                 map_addr = vm_map_trunc_page(*address,
4502                                              FOURK_PAGE_MASK);
4503                 map_size = vm_map_round_page(initial_size,
4504                                              FOURK_PAGE_MASK);
4505         } else
4506 #endif /* __arm64__ */
4507         {
4508                 map_addr = vm_map_trunc_page(*address,
4509                                              VM_MAP_PAGE_MASK(target_map));
4510                 map_size = vm_map_round_page(initial_size,
4511                                              VM_MAP_PAGE_MASK(target_map));
4512         }
4513         size = vm_object_round_page(initial_size);
4514
4515         object = memory_object_control_to_vm_object(control);
4516
4517         if (object == VM_OBJECT_NULL)
4518                 return KERN_INVALID_OBJECT;
4519
4520         if (object == kernel_object) {
4521                 printf("Warning: Attempt to map kernel object"
4522                        " by a non-private kernel entity\n");
4523                 return KERN_INVALID_OBJECT;
4524         }
4525
4526         vm_object_lock(object);
4527         object->ref_count++;
4528         vm_object_res_reference(object);
4529
4530         /*
4531          * For "named" VM objects, let the pager know that the
4532          * memory object is being mapped.  Some pagers need to keep
4533          * track of this, to know when they can reclaim the memory
4534          * object, for example.
4535          * VM calls memory_object_map() for each mapping (specifying
4536          * the protection of each mapping) and calls
4537          * memory_object_last_unmap() when all the mappings are gone.
4538          */
4539         pager_prot = max_protection;
4540         if (copy) {
4541                 pager_prot &= ~VM_PROT_WRITE;
4542         }
4543         pager = object->pager;
4544         if (object->named &&
4545             pager != MEMORY_OBJECT_NULL &&
4546             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4547                 assert(object->pager_ready);
4548                 vm_object_mapping_wait(object, THREAD_UNINT);
4549                 vm_object_mapping_begin(object);
4550                 vm_object_unlock(object);
4551
4552                 kr = memory_object_map(pager, pager_prot);
4553                 assert(kr == KERN_SUCCESS);
4554
4555                 vm_object_lock(object);
4556                 vm_object_mapping_end(object);
4557         }
4558         vm_object_unlock(object);
4559
4560         /*
4561          *      Perform the copy if requested
4562          */
4563
4564         if (copy) {
4565                 vm_object_t             new_object;
4566                 vm_object_offset_t      new_offset;
4567
4568                 result = vm_object_copy_strategically(object, offset, size,
4569                                                       &new_object, &new_offset,
4570                                                       &copy);
4571
4572
4573                 if (result == KERN_MEMORY_RESTART_COPY) {
4574                         boolean_t success;
4575                         boolean_t src_needs_copy;
4576
4577                         /*
4578                          * XXX
4579                          * We currently ignore src_needs_copy.
4580                          * This really is the issue of how to make
4581                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4582                          * non-kernel users to use. Solution forthcoming.
4583                          * In the meantime, since we don't allow non-kernel
4584                          * memory managers to specify symmetric copy,
4585                          * we won't run into problems here.
4586                          */
4587                         new_object = object;
4588                         new_offset = offset;
4589                         success = vm_object_copy_quickly(&new_object,
4590                                                          new_offset, size,
4591                                                          &src_needs_copy,
4592                                                          &copy);
4593                         assert(success);
4594                         result = KERN_SUCCESS;
4595                 }
4596                 /*
4597                  *      Throw away the reference to the
4598                  *      original object, as it won't be mapped.
4599                  */
4600
4601                 vm_object_deallocate(object);
4602
4603                 if (result != KERN_SUCCESS) {
4604                         return result;
4605                 }
4606
4607                 object = new_object;
4608                 offset = new_offset;
4609         }
4610
4611 #if __arm64__
4612         if (fourk) {
4613                 result = vm_map_enter_fourk(target_map,
4614                                             &map_addr,
4615                                             map_size,
4616                                             (vm_map_offset_t)mask,
4617                                             flags,
4618                                             vmk_flags,
4619                                             tag,
4620                                             object, offset,
4621                                             copy,
4622                                             cur_protection, max_protection,
4623                                             inheritance);
4624         } else
4625 #endif /* __arm64__ */
4626         {
4627                 result = vm_map_enter(target_map,
4628                                       &map_addr, map_size,
4629                                       (vm_map_offset_t)mask,
4630                                       flags,
4631                                       vmk_flags,
4632                                       tag,
4633                                       object, offset,
4634                                       copy,
4635                                       cur_protection, max_protection,
4636                                       inheritance);
4637         }
4638         if (result != KERN_SUCCESS)
4639                 vm_object_deallocate(object);
4640         *address = map_addr;
4641
4642         return result;
4643 }
4644
4645
4646 #if     VM_CPM
4647
4648 #ifdef MACH_ASSERT
4649 extern pmap_paddr_t     avail_start, avail_end;
4650 #endif
4651
4652 /*
4653  *      Allocate memory in the specified map, with the caveat that
4654  *      the memory is physically contiguous.  This call may fail
4655  *      if the system can't find sufficient contiguous memory.
4656  *      This call may cause or lead to heart-stopping amounts of
4657  *      paging activity.
4658  *
4659  *      Memory obtained from this call should be freed in the
4660  *      normal way, viz., via vm_deallocate.
4661  */
4662 kern_return_t
4663 vm_map_enter_cpm(
4664         vm_map_t                map,
4665         vm_map_offset_t *addr,
4666         vm_map_size_t           size,
4667         int                     flags)
4668 {
4669         vm_object_t             cpm_obj;
4670         pmap_t                  pmap;
4671         vm_page_t               m, pages;
4672         kern_return_t           kr;
4673         vm_map_offset_t         va, start, end, offset;
4674 #if     MACH_ASSERT
4675         vm_map_offset_t         prev_addr = 0;
4676 #endif  /* MACH_ASSERT */
4677
4678         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
4679         vm_tag_t tag;
4680
4681         VM_GET_FLAGS_ALIAS(flags, tag);
4682
4683         if (size == 0) {
4684                 *addr = 0;
4685                 return KERN_SUCCESS;
4686         }
4687         if (anywhere)
4688                 *addr = vm_map_min(map);
4689         else
4690                 *addr = vm_map_trunc_page(*addr,
4691                                           VM_MAP_PAGE_MASK(map));
4692         size = vm_map_round_page(size,
4693                                  VM_MAP_PAGE_MASK(map));
4694
4695         /*
4696          * LP64todo - cpm_allocate should probably allow
4697          * allocations of >4GB, but not with the current
4698          * algorithm, so just cast down the size for now.
4699          */
4700         if (size > VM_MAX_ADDRESS)
4701                 return KERN_RESOURCE_SHORTAGE;
4702         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
4703                                &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS)
4704                 return kr;
4705
4706         cpm_obj = vm_object_allocate((vm_object_size_t)size);
4707         assert(cpm_obj != VM_OBJECT_NULL);
4708         assert(cpm_obj->internal);
4709         assert(cpm_obj->vo_size == (vm_object_size_t)size);
4710         assert(cpm_obj->can_persist == FALSE);
4711         assert(cpm_obj->pager_created == FALSE);
4712         assert(cpm_obj->pageout == FALSE);
4713         assert(cpm_obj->shadow == VM_OBJECT_NULL);
4714
4715         /*
4716          *      Insert pages into object.
4717          */
4718
4719         vm_object_lock(cpm_obj);
4720         for (offset = 0; offset < size; offset += PAGE_SIZE) {
4721                 m = pages;
4722                 pages = NEXT_PAGE(m);
4723                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
4724
4725                 assert(!m->gobbled);
4726                 assert(!m->wanted);
4727                 assert(!m->pageout);
4728                 assert(!m->tabled);
4729                 assert(VM_PAGE_WIRED(m));
4730                 assert(m->busy);
4731                 assert(VM_PAGE_GET_PHYS_PAGE(m)>=(avail_start>>PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m)<=(avail_end>>PAGE_SHIFT));
4732
4733                 m->busy = FALSE;
4734                 vm_page_insert(m, cpm_obj, offset);
4735         }
4736         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
4737         vm_object_unlock(cpm_obj);
4738
4739         /*
4740          *      Hang onto a reference on the object in case a
4741          *      multi-threaded application for some reason decides
4742          *      to deallocate the portion of the address space into
4743          *      which we will insert this object.
4744          *
4745          *      Unfortunately, we must insert the object now before
4746          *      we can talk to the pmap module about which addresses
4747          *      must be wired down.  Hence, the race with a multi-
4748          *      threaded app.
4749          */
4750         vm_object_reference(cpm_obj);
4751
4752         /*
4753          *      Insert object into map.
4754          */
4755
4756         kr = vm_map_enter(
4757                 map,
4758                 addr,
4759                 size,
4760                 (vm_map_offset_t)0,
4761                 flags,
4762                 VM_MAP_KERNEL_FLAGS_NONE,
4763                 cpm_obj,
4764                 (vm_object_offset_t)0,
4765                 FALSE,
4766                 VM_PROT_ALL,
4767                 VM_PROT_ALL,
4768                 VM_INHERIT_DEFAULT);
4769
4770         if (kr != KERN_SUCCESS) {
4771                 /*
4772                  *      A CPM object doesn't have can_persist set,
4773                  *      so all we have to do is deallocate it to
4774                  *      free up these pages.
4775                  */
4776                 assert(cpm_obj->pager_created == FALSE);
4777                 assert(cpm_obj->can_persist == FALSE);
4778                 assert(cpm_obj->pageout == FALSE);
4779                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
4780                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
4781                 vm_object_deallocate(cpm_obj); /* kill creation ref */
4782         }
4783
4784         /*
4785          *      Inform the physical mapping system that the
4786          *      range of addresses may not fault, so that
4787          *      page tables and such can be locked down as well.
4788          */
4789         start = *addr;
4790         end = start + size;
4791         pmap = vm_map_pmap(map);
4792         pmap_pageable(pmap, start, end, FALSE);
4793
4794         /*
4795          *      Enter each page into the pmap, to avoid faults.
4796          *      Note that this loop could be coded more efficiently,
4797          *      if the need arose, rather than looking up each page
4798          *      again.
4799          */
4800         for (offset = 0, va = start; offset < size;
4801              va += PAGE_SIZE, offset += PAGE_SIZE) {
4802                 int type_of_fault;
4803
4804                 vm_object_lock(cpm_obj);
4805                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4806                 assert(m != VM_PAGE_NULL);
4807
4808                 vm_page_zero_fill(m);
4809
4810                 type_of_fault = DBG_ZERO_FILL_FAULT;
4811
4812                 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE,
4813                                                 VM_PAGE_WIRED(m),
4814                                                 FALSE, /* change_wiring */
4815                                                 VM_KERN_MEMORY_NONE, /* tag - not wiring */
4816                                                 FALSE, /* no_cache */
4817                                                 FALSE, /* cs_bypass */
4818                                                 0,     /* user_tag */
4819                                             0,     /* pmap_options */
4820                                                 NULL,  /* need_retry */
4821                                                 &type_of_fault);
4822
4823                 vm_object_unlock(cpm_obj);
4824         }
4825
4826 #if     MACH_ASSERT
4827         /*
4828          *      Verify ordering in address space.
4829          */
4830         for (offset = 0; offset < size; offset += PAGE_SIZE) {
4831                 vm_object_lock(cpm_obj);
4832                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
4833                 vm_object_unlock(cpm_obj);
4834                 if (m == VM_PAGE_NULL)
4835                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
4836                               cpm_obj, (uint64_t)offset);
4837                 assert(m->tabled);
4838                 assert(!m->busy);
4839                 assert(!m->wanted);
4840                 assert(!m->fictitious);
4841                 assert(!m->private);
4842                 assert(!m->absent);
4843                 assert(!m->error);
4844                 assert(!m->cleaning);
4845                 assert(!m->laundry);
4846                 assert(!m->precious);
4847                 assert(!m->clustered);
4848                 if (offset != 0) {
4849                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
4850                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
4851                                        (uint64_t)start, (uint64_t)end, (uint64_t)va);
4852                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
4853                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
4854                                 panic("vm_allocate_cpm:  pages not contig!");
4855                         }
4856                 }
4857                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
4858         }
4859 #endif  /* MACH_ASSERT */
4860
4861         vm_object_deallocate(cpm_obj); /* kill extra ref */
4862
4863         return kr;
4864 }
4865
4866
4867 #else   /* VM_CPM */
4868
4869 /*
4870  *      Interface is defined in all cases, but unless the kernel
4871  *      is built explicitly for this option, the interface does
4872  *      nothing.
4873  */
4874
4875 kern_return_t
4876 vm_map_enter_cpm(
4877         __unused vm_map_t       map,
4878         __unused vm_map_offset_t        *addr,
4879         __unused vm_map_size_t  size,
4880         __unused int            flags)
4881 {
4882         return KERN_FAILURE;
4883 }
4884 #endif /* VM_CPM */
4885
4886 /* Not used without nested pmaps */
4887 #ifndef NO_NESTED_PMAP
4888 /*
4889  * Clip and unnest a portion of a nested submap mapping.
4890  */
4891
4892
4893 static void
4894 vm_map_clip_unnest(
4895         vm_map_t        map,
4896         vm_map_entry_t  entry,
4897         vm_map_offset_t start_unnest,
4898         vm_map_offset_t end_unnest)
4899 {
4900         vm_map_offset_t old_start_unnest = start_unnest;
4901         vm_map_offset_t old_end_unnest = end_unnest;
4902
4903         assert(entry->is_sub_map);
4904         assert(VME_SUBMAP(entry) != NULL);
4905         assert(entry->use_pmap);
4906
4907         /*
4908          * Query the platform for the optimal unnest range.
4909          * DRK: There's some duplication of effort here, since
4910          * callers may have adjusted the range to some extent. This
4911          * routine was introduced to support 1GiB subtree nesting
4912          * for x86 platforms, which can also nest on 2MiB boundaries
4913          * depending on size/alignment.
4914          */
4915         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
4916                 assert(VME_SUBMAP(entry)->is_nested_map);
4917                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
4918                 log_unnest_badness(map,
4919                                    old_start_unnest,
4920                                    old_end_unnest,
4921                                    VME_SUBMAP(entry)->is_nested_map,
4922                                    (entry->vme_start +
4923                                     VME_SUBMAP(entry)->lowest_unnestable_start -
4924                                     VME_OFFSET(entry)));
4925         }
4926
4927         if (entry->vme_start > start_unnest ||
4928             entry->vme_end < end_unnest) {
4929                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
4930                       "bad nested entry: start=0x%llx end=0x%llx\n",
4931                       (long long)start_unnest, (long long)end_unnest,
4932                       (long long)entry->vme_start, (long long)entry->vme_end);
4933         }
4934
4935         if (start_unnest > entry->vme_start) {
4936                 _vm_map_clip_start(&map->hdr,
4937                                    entry,
4938                                    start_unnest);
4939                 if (map->holelistenabled) {
4940                         vm_map_store_update_first_free(map, NULL, FALSE);
4941                 } else {
4942                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4943                 }
4944         }
4945         if (entry->vme_end > end_unnest) {
4946                 _vm_map_clip_end(&map->hdr,
4947                                  entry,
4948                                  end_unnest);
4949                 if (map->holelistenabled) {
4950                         vm_map_store_update_first_free(map, NULL, FALSE);
4951                 } else {
4952                         vm_map_store_update_first_free(map, map->first_free, FALSE);
4953                 }
4954         }
4955
4956         pmap_unnest(map->pmap,
4957                     entry->vme_start,
4958                     entry->vme_end - entry->vme_start);
4959         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
4960                 /* clean up parent map/maps */
4961                 vm_map_submap_pmap_clean(
4962                         map, entry->vme_start,
4963                         entry->vme_end,
4964                         VME_SUBMAP(entry),
4965                         VME_OFFSET(entry));
4966         }
4967         entry->use_pmap = FALSE;
4968         if ((map->pmap != kernel_pmap) &&
4969             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
4970                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
4971         }
4972 }
4973 #endif  /* NO_NESTED_PMAP */
4974
4975 /*
4976  *      vm_map_clip_start:      [ internal use only ]
4977  *
4978  *      Asserts that the given entry begins at or after
4979  *      the specified address; if necessary,
4980  *      it splits the entry into two.
4981  */
4982 void
4983 vm_map_clip_start(
4984         vm_map_t        map,
4985         vm_map_entry_t  entry,
4986         vm_map_offset_t startaddr)
4987 {
4988 #ifndef NO_NESTED_PMAP
4989         if (entry->is_sub_map &&
4990             entry->use_pmap &&
4991             startaddr >= entry->vme_start) {
4992                 vm_map_offset_t start_unnest, end_unnest;
4993
4994                 /*
4995                  * Make sure "startaddr" is no longer in a nested range
4996                  * before we clip.  Unnest only the minimum range the platform
4997                  * can handle.
4998                  * vm_map_clip_unnest may perform additional adjustments to
4999                  * the unnest range.
5000                  */
5001                 start_unnest = startaddr & ~(pmap_nesting_size_min - 1);
5002                 end_unnest = start_unnest + pmap_nesting_size_min;
5003                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5004         }
5005 #endif /* NO_NESTED_PMAP */
5006         if (startaddr > entry->vme_start) {
5007                 if (VME_OBJECT(entry) &&
5008                     !entry->is_sub_map &&
5009                     VME_OBJECT(entry)->phys_contiguous) {
5010                         pmap_remove(map->pmap,
5011                                     (addr64_t)(entry->vme_start),
5012                                     (addr64_t)(entry->vme_end));
5013                 }
5014                 if (entry->vme_atomic) {
5015                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5016                 }
5017                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5018                 if (map->holelistenabled) {
5019                         vm_map_store_update_first_free(map, NULL, FALSE);
5020                 } else {
5021                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5022                 }
5023         }
5024 }
5025
5026
5027 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5028         MACRO_BEGIN \
5029         if ((startaddr) > (entry)->vme_start) \
5030                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5031         MACRO_END
5032
5033 /*
5034  *      This routine is called only when it is known that
5035  *      the entry must be split.
5036  */
5037 static void
5038 _vm_map_clip_start(
5039         struct vm_map_header    *map_header,
5040         vm_map_entry_t          entry,
5041         vm_map_offset_t         start)
5042 {
5043         vm_map_entry_t  new_entry;
5044
5045         /*
5046          *      Split off the front portion --
5047          *      note that we must insert the new
5048          *      entry BEFORE this one, so that
5049          *      this entry has the specified starting
5050          *      address.
5051          */
5052
5053         if (entry->map_aligned) {
5054                 assert(VM_MAP_PAGE_ALIGNED(start,
5055                                            VM_MAP_HDR_PAGE_MASK(map_header)));
5056         }
5057
5058         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5059         vm_map_entry_copy_full(new_entry, entry);
5060
5061         new_entry->vme_end = start;
5062         assert(new_entry->vme_start < new_entry->vme_end);
5063         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5064         assert(start < entry->vme_end);
5065         entry->vme_start = start;
5066
5067         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5068
5069         if (entry->is_sub_map)
5070                 vm_map_reference(VME_SUBMAP(new_entry));
5071         else
5072                 vm_object_reference(VME_OBJECT(new_entry));
5073 }
5074
5075
5076 /*
5077  *      vm_map_clip_end:        [ internal use only ]
5078  *
5079  *      Asserts that the given entry ends at or before
5080  *      the specified address; if necessary,
5081  *      it splits the entry into two.
5082  */
5083 void
5084 vm_map_clip_end(
5085         vm_map_t        map,
5086         vm_map_entry_t  entry,
5087         vm_map_offset_t endaddr)
5088 {
5089         if (endaddr > entry->vme_end) {
5090                 /*
5091                  * Within the scope of this clipping, limit "endaddr" to
5092                  * the end of this map entry...
5093                  */
5094                 endaddr = entry->vme_end;
5095         }
5096 #ifndef NO_NESTED_PMAP
5097         if (entry->is_sub_map && entry->use_pmap) {
5098                 vm_map_offset_t start_unnest, end_unnest;
5099
5100                 /*
5101                  * Make sure the range between the start of this entry and
5102                  * the new "endaddr" is no longer nested before we clip.
5103                  * Unnest only the minimum range the platform can handle.
5104                  * vm_map_clip_unnest may perform additional adjustments to
5105                  * the unnest range.
5106                  */
5107                 start_unnest = entry->vme_start;
5108                 end_unnest =
5109                         (endaddr + pmap_nesting_size_min - 1) &
5110                         ~(pmap_nesting_size_min - 1);
5111                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5112         }
5113 #endif /* NO_NESTED_PMAP */
5114         if (endaddr < entry->vme_end) {
5115                 if (VME_OBJECT(entry) &&
5116                     !entry->is_sub_map &&
5117                     VME_OBJECT(entry)->phys_contiguous) {
5118                         pmap_remove(map->pmap,
5119                                     (addr64_t)(entry->vme_start),
5120                                     (addr64_t)(entry->vme_end));
5121                 }
5122                 if (entry->vme_atomic) {
5123                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5124                 }
5125                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5126                 if (map->holelistenabled) {
5127                         vm_map_store_update_first_free(map, NULL, FALSE);
5128                 } else {
5129                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5130                 }
5131         }
5132 }
5133
5134
5135 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5136         MACRO_BEGIN \
5137         if ((endaddr) < (entry)->vme_end) \
5138                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5139         MACRO_END
5140
5141 /*
5142  *      This routine is called only when it is known that
5143  *      the entry must be split.
5144  */
5145 static void
5146 _vm_map_clip_end(
5147         struct vm_map_header    *map_header,
5148         vm_map_entry_t          entry,
5149         vm_map_offset_t         end)
5150 {
5151         vm_map_entry_t  new_entry;
5152
5153         /*
5154          *      Create a new entry and insert it
5155          *      AFTER the specified entry
5156          */
5157
5158         if (entry->map_aligned) {
5159                 assert(VM_MAP_PAGE_ALIGNED(end,
5160                                            VM_MAP_HDR_PAGE_MASK(map_header)));
5161         }
5162
5163         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5164         vm_map_entry_copy_full(new_entry, entry);
5165
5166         assert(entry->vme_start < end);
5167         new_entry->vme_start = entry->vme_end = end;
5168         VME_OFFSET_SET(new_entry,
5169                        VME_OFFSET(new_entry) + (end - entry->vme_start));
5170         assert(new_entry->vme_start < new_entry->vme_end);
5171
5172         _vm_map_store_entry_link(map_header, entry, new_entry);
5173
5174         if (entry->is_sub_map)
5175                 vm_map_reference(VME_SUBMAP(new_entry));
5176         else
5177                 vm_object_reference(VME_OBJECT(new_entry));
5178 }
5179
5180
5181 /*
5182  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5183  *
5184  *      Asserts that the starting and ending region
5185  *      addresses fall within the valid range of the map.
5186  */
5187 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5188         MACRO_BEGIN                             \
5189         if (start < vm_map_min(map))            \
5190                 start = vm_map_min(map);        \
5191         if (end > vm_map_max(map))              \
5192                 end = vm_map_max(map);          \
5193         if (start > end)                        \
5194                 start = end;                    \
5195         MACRO_END
5196
5197 /*
5198  *      vm_map_range_check:     [ internal use only ]
5199  *
5200  *      Check that the region defined by the specified start and
5201  *      end addresses are wholly contained within a single map
5202  *      entry or set of adjacent map entries of the spacified map,
5203  *      i.e. the specified region contains no unmapped space.
5204  *      If any or all of the region is unmapped, FALSE is returned.
5205  *      Otherwise, TRUE is returned and if the output argument 'entry'
5206  *      is not NULL it points to the map entry containing the start
5207  *      of the region.
5208  *
5209  *      The map is locked for reading on entry and is left locked.
5210  */
5211 static boolean_t
5212 vm_map_range_check(
5213         vm_map_t                map,
5214         vm_map_offset_t         start,
5215         vm_map_offset_t         end,
5216         vm_map_entry_t          *entry)
5217 {
5218         vm_map_entry_t          cur;
5219         vm_map_offset_t         prev;
5220
5221         /*
5222          *      Basic sanity checks first
5223          */
5224         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
5225                 return (FALSE);
5226
5227         /*
5228          *      Check first if the region starts within a valid
5229          *      mapping for the map.
5230          */
5231         if (!vm_map_lookup_entry(map, start, &cur))
5232                 return (FALSE);
5233
5234         /*
5235          *      Optimize for the case that the region is contained
5236          *      in a single map entry.
5237          */
5238         if (entry != (vm_map_entry_t *) NULL)
5239                 *entry = cur;
5240         if (end <= cur->vme_end)
5241                 return (TRUE);
5242
5243         /*
5244          *      If the region is not wholly contained within a
5245          *      single entry, walk the entries looking for holes.
5246          */
5247         prev = cur->vme_end;
5248         cur = cur->vme_next;
5249         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5250                 if (end <= cur->vme_end)
5251                         return (TRUE);
5252                 prev = cur->vme_end;
5253                 cur = cur->vme_next;
5254         }
5255         return (FALSE);
5256 }
5257
5258 /*
5259  *      vm_map_submap:          [ kernel use only ]
5260  *
5261  *      Mark the given range as handled by a subordinate map.
5262  *
5263  *      This range must have been created with vm_map_find using
5264  *      the vm_submap_object, and no other operations may have been
5265  *      performed on this range prior to calling vm_map_submap.
5266  *
5267  *      Only a limited number of operations can be performed
5268  *      within this rage after calling vm_map_submap:
5269  *              vm_fault
5270  *      [Don't try vm_map_copyin!]
5271  *
5272  *      To remove a submapping, one must first remove the
5273  *      range from the superior map, and then destroy the
5274  *      submap (if desired).  [Better yet, don't try it.]
5275  */
5276 kern_return_t
5277 vm_map_submap(
5278         vm_map_t        map,
5279         vm_map_offset_t start,
5280         vm_map_offset_t end,
5281         vm_map_t        submap,
5282         vm_map_offset_t offset,
5283 #ifdef NO_NESTED_PMAP
5284         __unused
5285 #endif  /* NO_NESTED_PMAP */
5286         boolean_t       use_pmap)
5287 {
5288         vm_map_entry_t          entry;
5289         kern_return_t           result = KERN_INVALID_ARGUMENT;
5290         vm_object_t             object;
5291
5292         vm_map_lock(map);
5293
5294         if (! vm_map_lookup_entry(map, start, &entry)) {
5295                 entry = entry->vme_next;
5296         }
5297
5298         if (entry == vm_map_to_entry(map) ||
5299             entry->is_sub_map) {
5300                 vm_map_unlock(map);
5301                 return KERN_INVALID_ARGUMENT;
5302         }
5303
5304         vm_map_clip_start(map, entry, start);
5305         vm_map_clip_end(map, entry, end);
5306
5307         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5308             (!entry->is_sub_map) &&
5309             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5310             (object->resident_page_count == 0) &&
5311             (object->copy == VM_OBJECT_NULL) &&
5312             (object->shadow == VM_OBJECT_NULL) &&
5313             (!object->pager_created)) {
5314                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5315                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5316                 vm_object_deallocate(object);
5317                 entry->is_sub_map = TRUE;
5318                 entry->use_pmap = FALSE;
5319                 VME_SUBMAP_SET(entry, submap);
5320                 vm_map_reference(submap);
5321                 if (submap->mapped_in_other_pmaps == FALSE &&
5322                     vm_map_pmap(submap) != PMAP_NULL &&
5323                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5324                         /*
5325                          * This submap is being mapped in a map
5326                          * that uses a different pmap.
5327                          * Set its "mapped_in_other_pmaps" flag
5328                          * to indicate that we now need to
5329                          * remove mappings from all pmaps rather
5330                          * than just the submap's pmap.
5331                          */
5332                         submap->mapped_in_other_pmaps = TRUE;
5333                 }
5334
5335 #ifndef NO_NESTED_PMAP
5336                 if (use_pmap) {
5337                         /* nest if platform code will allow */
5338                         if(submap->pmap == NULL) {
5339                                 ledger_t ledger = map->pmap->ledger;
5340                                 submap->pmap = pmap_create(ledger,
5341                                                 (vm_map_size_t) 0, FALSE);
5342                                 if(submap->pmap == PMAP_NULL) {
5343                                         vm_map_unlock(map);
5344                                         return(KERN_NO_SPACE);
5345                                 }
5346 #if     defined(__arm__) || defined(__arm64__)
5347                                 pmap_set_nested(submap->pmap);
5348 #endif
5349                         }
5350                         result = pmap_nest(map->pmap,
5351                                            (VME_SUBMAP(entry))->pmap,
5352                                            (addr64_t)start,
5353                                            (addr64_t)start,
5354                                            (uint64_t)(end - start));
5355                         if(result)
5356                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
5357                         entry->use_pmap = TRUE;
5358                 }
5359 #else   /* NO_NESTED_PMAP */
5360                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
5361 #endif  /* NO_NESTED_PMAP */
5362                 result = KERN_SUCCESS;
5363         }
5364         vm_map_unlock(map);
5365
5366         return(result);
5367 }
5368
5369 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5370 #include <sys/codesign.h>
5371 extern int proc_selfcsflags(void);
5372 extern int panic_on_unsigned_execute;
5373 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5374
5375 /*
5376  *      vm_map_protect:
5377  *
5378  *      Sets the protection of the specified address
5379  *      region in the target map.  If "set_max" is
5380  *      specified, the maximum protection is to be set;
5381  *      otherwise, only the current protection is affected.
5382  */
5383 kern_return_t
5384 vm_map_protect(
5385         vm_map_t        map,
5386         vm_map_offset_t start,
5387         vm_map_offset_t end,
5388         vm_prot_t       new_prot,
5389         boolean_t       set_max)
5390 {
5391         vm_map_entry_t                  current;
5392         vm_map_offset_t                 prev;
5393         vm_map_entry_t                  entry;
5394         vm_prot_t                       new_max;
5395         int                             pmap_options = 0;
5396         kern_return_t                   kr;
5397
5398         XPR(XPR_VM_MAP,
5399             "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d",
5400             map, start, end, new_prot, set_max);
5401
5402         if (new_prot & VM_PROT_COPY) {
5403                 vm_map_offset_t         new_start;
5404                 vm_prot_t               cur_prot, max_prot;
5405                 vm_map_kernel_flags_t   kflags;
5406
5407                 /* LP64todo - see below */
5408                 if (start >= map->max_offset) {
5409                         return KERN_INVALID_ADDRESS;
5410                 }
5411
5412                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
5413                 kflags.vmkf_remap_prot_copy = TRUE;
5414                 new_start = start;
5415                 kr = vm_map_remap(map,
5416                                   &new_start,
5417                                   end - start,
5418                                   0, /* mask */
5419                                   VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
5420                                   kflags,
5421                                   0,
5422                                   map,
5423                                   start,
5424                                   TRUE, /* copy-on-write remapping! */
5425                                   &cur_prot,
5426                                   &max_prot,
5427                                   VM_INHERIT_DEFAULT);
5428                 if (kr != KERN_SUCCESS) {
5429                         return kr;
5430                 }
5431                 new_prot &= ~VM_PROT_COPY;
5432         }
5433
5434         vm_map_lock(map);
5435
5436         /* LP64todo - remove this check when vm_map_commpage64()
5437          * no longer has to stuff in a map_entry for the commpage
5438          * above the map's max_offset.
5439          */
5440         if (start >= map->max_offset) {
5441                 vm_map_unlock(map);
5442                 return(KERN_INVALID_ADDRESS);
5443         }
5444
5445         while(1) {
5446                 /*
5447                  *      Lookup the entry.  If it doesn't start in a valid
5448                  *      entry, return an error.
5449                  */
5450                 if (! vm_map_lookup_entry(map, start, &entry)) {
5451                         vm_map_unlock(map);
5452                         return(KERN_INVALID_ADDRESS);
5453                 }
5454
5455                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */
5456                         start = SUPERPAGE_ROUND_DOWN(start);
5457                         continue;
5458                 }
5459                 break;
5460         }
5461         if (entry->superpage_size)
5462                 end = SUPERPAGE_ROUND_UP(end);
5463
5464         /*
5465          *      Make a first pass to check for protection and address
5466          *      violations.
5467          */
5468
5469         current = entry;
5470         prev = current->vme_start;
5471         while ((current != vm_map_to_entry(map)) &&
5472                (current->vme_start < end)) {
5473
5474                 /*
5475                  * If there is a hole, return an error.
5476                  */
5477                 if (current->vme_start != prev) {
5478                         vm_map_unlock(map);
5479                         return(KERN_INVALID_ADDRESS);
5480                 }
5481
5482                 new_max = current->max_protection;
5483                 if ((new_prot & new_max) != new_prot) {
5484                         vm_map_unlock(map);
5485                         return(KERN_PROTECTION_FAILURE);
5486                 }
5487
5488 #if CONFIG_EMBEDDED
5489                 if (new_prot & VM_PROT_WRITE) {
5490                         if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) {
5491                                 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
5492                                 new_prot &= ~VM_PROT_EXECUTE;
5493                         }
5494                 }
5495 #endif
5496
5497                 prev = current->vme_end;
5498                 current = current->vme_next;
5499         }
5500
5501 #if __arm64__
5502         if (end > prev &&
5503             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
5504                 vm_map_entry_t prev_entry;
5505
5506                 prev_entry = current->vme_prev;
5507                 if (prev_entry != vm_map_to_entry(map) &&
5508                     !prev_entry->map_aligned &&
5509                     (vm_map_round_page(prev_entry->vme_end,
5510                                        VM_MAP_PAGE_MASK(map))
5511                      == end)) {
5512                         /*
5513                          * The last entry in our range is not "map-aligned"
5514                          * but it would have reached all the way to "end"
5515                          * if it had been map-aligned, so this is not really
5516                          * a hole in the range and we can proceed.
5517                          */
5518                         prev = end;
5519                 }
5520         }
5521 #endif /* __arm64__ */
5522
5523         if (end > prev) {
5524                 vm_map_unlock(map);
5525                 return(KERN_INVALID_ADDRESS);
5526         }
5527
5528         /*
5529          *      Go back and fix up protections.
5530          *      Clip to start here if the range starts within
5531          *      the entry.
5532          */
5533
5534         current = entry;
5535         if (current != vm_map_to_entry(map)) {
5536                 /* clip and unnest if necessary */
5537                 vm_map_clip_start(map, current, start);
5538         }
5539
5540         while ((current != vm_map_to_entry(map)) &&
5541                (current->vme_start < end)) {
5542
5543                 vm_prot_t       old_prot;
5544
5545                 vm_map_clip_end(map, current, end);
5546
5547                 if (current->is_sub_map) {
5548                         /* clipping did unnest if needed */
5549                         assert(!current->use_pmap);
5550                 }
5551
5552                 old_prot = current->protection;
5553
5554                 if (set_max) {
5555                         current->max_protection = new_prot;
5556                         current->protection = new_prot & old_prot;
5557                 } else {
5558                         current->protection = new_prot;
5559                 }
5560
5561                 /*
5562                  *      Update physical map if necessary.
5563                  *      If the request is to turn off write protection,
5564                  *      we won't do it for real (in pmap). This is because
5565                  *      it would cause copy-on-write to fail.  We've already
5566                  *      set, the new protection in the map, so if a
5567                  *      write-protect fault occurred, it will be fixed up
5568                  *      properly, COW or not.
5569                  */
5570                 if (current->protection != old_prot) {
5571                         /* Look one level in we support nested pmaps */
5572                         /* from mapped submaps which are direct entries */
5573                         /* in our map */
5574
5575                         vm_prot_t prot;
5576
5577                         prot = current->protection;
5578                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
5579                                 prot &= ~VM_PROT_WRITE;
5580                         } else {
5581                                 assert(!VME_OBJECT(current)->code_signed);
5582                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
5583                         }
5584
5585                         if (override_nx(map, VME_ALIAS(current)) && prot)
5586                                 prot |= VM_PROT_EXECUTE;
5587
5588 #if CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG)
5589                         if (!(old_prot & VM_PROT_EXECUTE) &&
5590                             (prot & VM_PROT_EXECUTE) &&
5591                             (proc_selfcsflags() & CS_KILL) &&
5592                             panic_on_unsigned_execute) {
5593                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
5594                         }
5595 #endif /* CONFIG_EMBEDDED && (DEVELOPMENT || DEBUG) */
5596
5597                         if (pmap_has_prot_policy(prot)) {
5598                                 if (current->wired_count) {
5599                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
5600                                               map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
5601                                 }
5602
5603                                 /* If the pmap layer cares about this
5604                                  * protection type, force a fault for
5605                                  * each page so that vm_fault will
5606                                  * repopulate the page with the full
5607                                  * set of protections.
5608                                  */
5609                                 /*
5610                                  * TODO: We don't seem to need this,
5611                                  * but this is due to an internal
5612                                  * implementation detail of
5613                                  * pmap_protect.  Do we want to rely
5614                                  * on this?
5615                                  */
5616                                 prot = VM_PROT_NONE;
5617                         }
5618
5619                         if (current->is_sub_map && current->use_pmap) {
5620                                 pmap_protect(VME_SUBMAP(current)->pmap,
5621                                              current->vme_start,
5622                                              current->vme_end,
5623                                              prot);
5624                         } else {
5625                                 if (prot & VM_PROT_WRITE) {
5626                                         if (VME_OBJECT(current) == compressor_object) {
5627                                                 /*
5628                                                  * For write requests on the
5629                                                  * compressor, we wil ask the
5630                                                  * pmap layer to prevent us from
5631                                                  * taking a write fault when we
5632                                                  * attempt to access the mapping
5633                                                  * next.
5634                                                  */
5635                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
5636                                         }
5637                                 }
5638
5639                                 pmap_protect_options(map->pmap,
5640                                                      current->vme_start,
5641                                                      current->vme_end,
5642                                                      prot,
5643                                                      pmap_options,
5644                                                      NULL);
5645                         }
5646                 }
5647                 current = current->vme_next;
5648         }
5649
5650         current = entry;
5651         while ((current != vm_map_to_entry(map)) &&
5652                (current->vme_start <= end)) {
5653                 vm_map_simplify_entry(map, current);
5654                 current = current->vme_next;
5655         }
5656
5657         vm_map_unlock(map);
5658         return(KERN_SUCCESS);
5659 }
5660
5661 /*
5662  *      vm_map_inherit:
5663  *
5664  *      Sets the inheritance of the specified address
5665  *      range in the target map.  Inheritance
5666  *      affects how the map will be shared with
5667  *      child maps at the time of vm_map_fork.
5668  */
5669 kern_return_t
5670 vm_map_inherit(
5671         vm_map_t        map,
5672         vm_map_offset_t start,
5673         vm_map_offset_t end,
5674         vm_inherit_t    new_inheritance)
5675 {
5676         vm_map_entry_t  entry;
5677         vm_map_entry_t  temp_entry;
5678
5679         vm_map_lock(map);
5680
5681         VM_MAP_RANGE_CHECK(map, start, end);
5682
5683         if (vm_map_lookup_entry(map, start, &temp_entry)) {
5684                 entry = temp_entry;
5685         }
5686         else {
5687                 temp_entry = temp_entry->vme_next;
5688                 entry = temp_entry;
5689         }
5690
5691         /* first check entire range for submaps which can't support the */
5692         /* given inheritance. */
5693         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5694                 if(entry->is_sub_map) {
5695                         if(new_inheritance == VM_INHERIT_COPY) {
5696                                 vm_map_unlock(map);
5697                                 return(KERN_INVALID_ARGUMENT);
5698                         }
5699                 }
5700
5701                 entry = entry->vme_next;
5702         }
5703
5704         entry = temp_entry;
5705         if (entry != vm_map_to_entry(map)) {
5706                 /* clip and unnest if necessary */
5707                 vm_map_clip_start(map, entry, start);
5708         }
5709
5710         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
5711                 vm_map_clip_end(map, entry, end);
5712                 if (entry->is_sub_map) {
5713                         /* clip did unnest if needed */
5714                         assert(!entry->use_pmap);
5715                 }
5716
5717                 entry->inheritance = new_inheritance;
5718
5719                 entry = entry->vme_next;
5720         }
5721
5722         vm_map_unlock(map);
5723         return(KERN_SUCCESS);
5724 }
5725
5726 /*
5727  * Update the accounting for the amount of wired memory in this map.  If the user has
5728  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
5729  */
5730
5731 static kern_return_t
5732 add_wire_counts(
5733         vm_map_t        map,
5734         vm_map_entry_t  entry,
5735         boolean_t       user_wire)
5736 {
5737         vm_map_size_t   size;
5738
5739         if (user_wire) {
5740                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
5741
5742                 /*
5743                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
5744                  * this map entry.
5745                  */
5746
5747                 if (entry->user_wired_count == 0) {
5748                         size = entry->vme_end - entry->vme_start;
5749
5750                         /*
5751                          * Since this is the first time the user is wiring this map entry, check to see if we're
5752                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
5753                          * the process's rlimit or the global vm_user_wire_limit which caps this value.  There is also
5754                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
5755                          * limit, then we fail.
5756                          */
5757
5758                         if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) ||
5759                            size + ptoa_64(total_wire_count) > vm_global_user_wire_limit ||
5760                            size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount)
5761                                 return KERN_RESOURCE_SHORTAGE;
5762
5763                         /*
5764                          * The first time the user wires an entry, we also increment the wired_count and add this to
5765                          * the total that has been wired in the map.
5766                          */
5767
5768                         if (entry->wired_count >= MAX_WIRE_COUNT)
5769                                 return KERN_FAILURE;
5770
5771                         entry->wired_count++;
5772                         map->user_wire_size += size;
5773                 }
5774
5775                 if (entry->user_wired_count >= MAX_WIRE_COUNT)
5776                         return KERN_FAILURE;
5777
5778                 entry->user_wired_count++;
5779
5780         } else {
5781
5782                 /*
5783                  * The kernel's wiring the memory.  Just bump the count and continue.
5784                  */
5785
5786                 if (entry->wired_count >= MAX_WIRE_COUNT)
5787                         panic("vm_map_wire: too many wirings");
5788
5789                 entry->wired_count++;
5790         }
5791
5792         return KERN_SUCCESS;
5793 }
5794
5795 /*
5796  * Update the memory wiring accounting now that the given map entry is being unwired.
5797  */
5798
5799 static void
5800 subtract_wire_counts(
5801         vm_map_t        map,
5802         vm_map_entry_t  entry,
5803         boolean_t       user_wire)
5804 {
5805
5806         if (user_wire) {
5807
5808                 /*
5809                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
5810                  */
5811
5812                 if (entry->user_wired_count == 1) {
5813
5814                         /*
5815                          * We're removing the last user wire reference.  Decrement the wired_count and the total
5816                          * user wired memory for this map.
5817                          */
5818
5819                         assert(entry->wired_count >= 1);
5820                         entry->wired_count--;
5821                         map->user_wire_size -= entry->vme_end - entry->vme_start;
5822                 }
5823
5824                 assert(entry->user_wired_count >= 1);
5825                 entry->user_wired_count--;
5826
5827         } else {
5828
5829                 /*
5830                  * The kernel is unwiring the memory.   Just update the count.
5831                  */
5832
5833                 assert(entry->wired_count >= 1);
5834                 entry->wired_count--;
5835         }
5836 }
5837
5838 #if CONFIG_EMBEDDED
5839 int cs_executable_wire = 0;
5840 #endif /* CONFIG_EMBEDDED */
5841
5842 /*
5843  *      vm_map_wire:
5844  *
5845  *      Sets the pageability of the specified address range in the
5846  *      target map as wired.  Regions specified as not pageable require
5847  *      locked-down physical memory and physical page maps.  The
5848  *      access_type variable indicates types of accesses that must not
5849  *      generate page faults.  This is checked against protection of
5850  *      memory being locked-down.
5851  *
5852  *      The map must not be locked, but a reference must remain to the
5853  *      map throughout the call.
5854  */
5855 static kern_return_t
5856 vm_map_wire_nested(
5857         vm_map_t                map,
5858         vm_map_offset_t         start,
5859         vm_map_offset_t         end,
5860         vm_prot_t               caller_prot,
5861         vm_tag_t                tag,
5862         boolean_t               user_wire,
5863         pmap_t                  map_pmap,
5864         vm_map_offset_t         pmap_addr,
5865         ppnum_t                 *physpage_p)
5866 {
5867         vm_map_entry_t          entry;
5868         vm_prot_t               access_type;
5869         struct vm_map_entry     *first_entry, tmp_entry;
5870         vm_map_t                real_map;
5871         vm_map_offset_t         s,e;
5872         kern_return_t           rc;
5873         boolean_t               need_wakeup;
5874         boolean_t               main_map = FALSE;
5875         wait_interrupt_t        interruptible_state;
5876         thread_t                cur_thread;
5877         unsigned int            last_timestamp;
5878         vm_map_size_t           size;
5879         boolean_t               wire_and_extract;
5880
5881         access_type = (caller_prot & VM_PROT_ALL);
5882
5883         wire_and_extract = FALSE;
5884         if (physpage_p != NULL) {
5885                 /*
5886                  * The caller wants the physical page number of the
5887                  * wired page.  We return only one physical page number
5888                  * so this works for only one page at a time.
5889                  */
5890                 if ((end - start) != PAGE_SIZE) {
5891                         return KERN_INVALID_ARGUMENT;
5892                 }
5893                 wire_and_extract = TRUE;
5894                 *physpage_p = 0;
5895         }
5896
5897         vm_map_lock(map);
5898         if(map_pmap == NULL)
5899                 main_map = TRUE;
5900         last_timestamp = map->timestamp;
5901
5902         VM_MAP_RANGE_CHECK(map, start, end);
5903         assert(page_aligned(start));
5904         assert(page_aligned(end));
5905         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
5906         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
5907         if (start == end) {
5908                 /* We wired what the caller asked for, zero pages */
5909                 vm_map_unlock(map);
5910                 return KERN_SUCCESS;
5911         }
5912
5913         need_wakeup = FALSE;
5914         cur_thread = current_thread();
5915
5916         s = start;
5917         rc = KERN_SUCCESS;
5918
5919         if (vm_map_lookup_entry(map, s, &first_entry)) {
5920                 entry = first_entry;
5921                 /*
5922                  * vm_map_clip_start will be done later.
5923                  * We don't want to unnest any nested submaps here !
5924                  */
5925         } else {
5926                 /* Start address is not in map */
5927                 rc = KERN_INVALID_ADDRESS;
5928                 goto done;
5929         }
5930
5931         while ((entry != vm_map_to_entry(map)) && (s < end)) {
5932                 /*
5933                  * At this point, we have wired from "start" to "s".
5934                  * We still need to wire from "s" to "end".
5935                  *
5936                  * "entry" hasn't been clipped, so it could start before "s"
5937                  * and/or end after "end".
5938                  */
5939
5940                 /* "e" is how far we want to wire in this entry */
5941                 e = entry->vme_end;
5942                 if (e > end)
5943                         e = end;
5944
5945                 /*
5946                  * If another thread is wiring/unwiring this entry then
5947                  * block after informing other thread to wake us up.
5948                  */
5949                 if (entry->in_transition) {
5950                         wait_result_t wait_result;
5951
5952                         /*
5953                          * We have not clipped the entry.  Make sure that
5954                          * the start address is in range so that the lookup
5955                          * below will succeed.
5956                          * "s" is the current starting point: we've already
5957                          * wired from "start" to "s" and we still have
5958                          * to wire from "s" to "end".
5959                          */
5960
5961                         entry->needs_wakeup = TRUE;
5962
5963                         /*
5964                          * wake up anybody waiting on entries that we have
5965                          * already wired.
5966                          */
5967                         if (need_wakeup) {
5968                                 vm_map_entry_wakeup(map);
5969                                 need_wakeup = FALSE;
5970                         }
5971                         /*
5972                          * User wiring is interruptible
5973                          */
5974                         wait_result = vm_map_entry_wait(map,
5975                                                         (user_wire) ? THREAD_ABORTSAFE :
5976                                                         THREAD_UNINT);
5977                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
5978                                 /*
5979                                  * undo the wirings we have done so far
5980                                  * We do not clear the needs_wakeup flag,
5981                                  * because we cannot tell if we were the
5982                                  * only one waiting.
5983                                  */
5984                                 rc = KERN_FAILURE;
5985                                 goto done;
5986                         }
5987
5988                         /*
5989                          * Cannot avoid a lookup here. reset timestamp.
5990                          */
5991                         last_timestamp = map->timestamp;
5992
5993                         /*
5994                          * The entry could have been clipped, look it up again.
5995                          * Worse that can happen is, it may not exist anymore.
5996                          */
5997                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
5998                                 /*
5999                                  * User: undo everything upto the previous
6000                                  * entry.  let vm_map_unwire worry about
6001                                  * checking the validity of the range.
6002                                  */
6003                                 rc = KERN_FAILURE;
6004                                 goto done;
6005                         }
6006                         entry = first_entry;
6007                         continue;
6008                 }
6009
6010                 if (entry->is_sub_map) {
6011                         vm_map_offset_t sub_start;
6012                         vm_map_offset_t sub_end;
6013                         vm_map_offset_t local_start;
6014                         vm_map_offset_t local_end;
6015                         pmap_t          pmap;
6016
6017                         if (wire_and_extract) {
6018                                 /*
6019                                  * Wiring would result in copy-on-write
6020                                  * which would not be compatible with
6021                                  * the sharing we have with the original
6022                                  * provider of this memory.
6023                                  */
6024                                 rc = KERN_INVALID_ARGUMENT;
6025                                 goto done;
6026                         }
6027
6028                         vm_map_clip_start(map, entry, s);
6029                         vm_map_clip_end(map, entry, end);
6030
6031                         sub_start = VME_OFFSET(entry);
6032                         sub_end = entry->vme_end;
6033                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6034
6035                         local_end = entry->vme_end;
6036                         if(map_pmap == NULL) {
6037                                 vm_object_t             object;
6038                                 vm_object_offset_t      offset;
6039                                 vm_prot_t               prot;
6040                                 boolean_t               wired;
6041                                 vm_map_entry_t          local_entry;
6042                                 vm_map_version_t         version;
6043                                 vm_map_t                lookup_map;
6044
6045                                 if(entry->use_pmap) {
6046                                         pmap = VME_SUBMAP(entry)->pmap;
6047                                         /* ppc implementation requires that */
6048                                         /* submaps pmap address ranges line */
6049                                         /* up with parent map */
6050 #ifdef notdef
6051                                         pmap_addr = sub_start;
6052 #endif
6053                                         pmap_addr = s;
6054                                 } else {
6055                                         pmap = map->pmap;
6056                                         pmap_addr = s;
6057                                 }
6058
6059                                 if (entry->wired_count) {
6060                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6061                                                 goto done;
6062
6063                                         /*
6064                                          * The map was not unlocked:
6065                                          * no need to goto re-lookup.
6066                                          * Just go directly to next entry.
6067                                          */
6068                                         entry = entry->vme_next;
6069                                         s = entry->vme_start;
6070                                         continue;
6071
6072                                 }
6073
6074                                 /* call vm_map_lookup_locked to */
6075                                 /* cause any needs copy to be   */
6076                                 /* evaluated */
6077                                 local_start = entry->vme_start;
6078                                 lookup_map = map;
6079                                 vm_map_lock_write_to_read(map);
6080                                 if(vm_map_lookup_locked(
6081                                            &lookup_map, local_start,
6082                                            access_type | VM_PROT_COPY,
6083                                            OBJECT_LOCK_EXCLUSIVE,
6084                                            &version, &object,
6085                                            &offset, &prot, &wired,
6086                                            NULL,
6087                                            &real_map)) {
6088
6089                                         vm_map_unlock_read(lookup_map);
6090                                         assert(map_pmap == NULL);
6091                                         vm_map_unwire(map, start,
6092                                                       s, user_wire);
6093                                         return(KERN_FAILURE);
6094                                 }
6095                                 vm_object_unlock(object);
6096                                 if(real_map != lookup_map)
6097                                         vm_map_unlock(real_map);
6098                                 vm_map_unlock_read(lookup_map);
6099                                 vm_map_lock(map);
6100
6101                                 /* we unlocked, so must re-lookup */
6102                                 if (!vm_map_lookup_entry(map,
6103                                                          local_start,
6104                                                          &local_entry)) {
6105                                         rc = KERN_FAILURE;
6106                                         goto done;
6107                                 }
6108
6109                                 /*
6110                                  * entry could have been "simplified",
6111                                  * so re-clip
6112                                  */
6113                                 entry = local_entry;
6114                                 assert(s == local_start);
6115                                 vm_map_clip_start(map, entry, s);
6116                                 vm_map_clip_end(map, entry, end);
6117                                 /* re-compute "e" */
6118                                 e = entry->vme_end;
6119                                 if (e > end)
6120                                         e = end;
6121
6122                                 /* did we have a change of type? */
6123                                 if (!entry->is_sub_map) {
6124                                         last_timestamp = map->timestamp;
6125                                         continue;
6126                                 }
6127                         } else {
6128                                 local_start = entry->vme_start;
6129                                 pmap = map_pmap;
6130                         }
6131
6132                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6133                                 goto done;
6134
6135                         entry->in_transition = TRUE;
6136
6137                         vm_map_unlock(map);
6138                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6139                                                 sub_start, sub_end,
6140                                                 caller_prot, tag,
6141                                                 user_wire, pmap, pmap_addr,
6142                                                 NULL);
6143                         vm_map_lock(map);
6144
6145                         /*
6146                          * Find the entry again.  It could have been clipped
6147                          * after we unlocked the map.
6148                          */
6149                         if (!vm_map_lookup_entry(map, local_start,
6150                                                  &first_entry))
6151                                 panic("vm_map_wire: re-lookup failed");
6152                         entry = first_entry;
6153
6154                         assert(local_start == s);
6155                         /* re-compute "e" */
6156                         e = entry->vme_end;
6157                         if (e > end)
6158                                 e = end;
6159
6160                         last_timestamp = map->timestamp;
6161                         while ((entry != vm_map_to_entry(map)) &&
6162                                (entry->vme_start < e)) {
6163                                 assert(entry->in_transition);
6164                                 entry->in_transition = FALSE;
6165                                 if (entry->needs_wakeup) {
6166                                         entry->needs_wakeup = FALSE;
6167                                         need_wakeup = TRUE;
6168                                 }
6169                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6170                                         subtract_wire_counts(map, entry, user_wire);
6171                                 }
6172                                 entry = entry->vme_next;
6173                         }
6174                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6175                                 goto done;
6176                         }
6177
6178                         /* no need to relookup again */
6179                         s = entry->vme_start;
6180                         continue;
6181                 }
6182
6183                 /*
6184                  * If this entry is already wired then increment
6185                  * the appropriate wire reference count.
6186                  */
6187                 if (entry->wired_count) {
6188
6189                         if ((entry->protection & access_type) != access_type) {
6190                                 /* found a protection problem */
6191
6192                                 /*
6193                                  * XXX FBDP
6194                                  * We should always return an error
6195                                  * in this case but since we didn't
6196                                  * enforce it before, let's do
6197                                  * it only for the new "wire_and_extract"
6198                                  * code path for now...
6199                                  */
6200                                 if (wire_and_extract) {
6201                                         rc = KERN_PROTECTION_FAILURE;
6202                                         goto done;
6203                                 }
6204                         }
6205
6206                         /*
6207                          * entry is already wired down, get our reference
6208                          * after clipping to our range.
6209                          */
6210                         vm_map_clip_start(map, entry, s);
6211                         vm_map_clip_end(map, entry, end);
6212
6213                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6214                                 goto done;
6215
6216                         if (wire_and_extract) {
6217                                 vm_object_t             object;
6218                                 vm_object_offset_t      offset;
6219                                 vm_page_t               m;
6220
6221                                 /*
6222                                  * We don't have to "wire" the page again
6223                                  * bit we still have to "extract" its
6224                                  * physical page number, after some sanity
6225                                  * checks.
6226                                  */
6227                                 assert((entry->vme_end - entry->vme_start)
6228                                        == PAGE_SIZE);
6229                                 assert(!entry->needs_copy);
6230                                 assert(!entry->is_sub_map);
6231                                 assert(VME_OBJECT(entry));
6232                                 if (((entry->vme_end - entry->vme_start)
6233                                      != PAGE_SIZE) ||
6234                                     entry->needs_copy ||
6235                                     entry->is_sub_map ||
6236                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6237                                         rc = KERN_INVALID_ARGUMENT;
6238                                         goto done;
6239                                 }
6240
6241                                 object = VME_OBJECT(entry);
6242                                 offset = VME_OFFSET(entry);
6243                                 /* need exclusive lock to update m->dirty */
6244                                 if (entry->protection & VM_PROT_WRITE) {
6245                                         vm_object_lock(object);
6246                                 } else {
6247                                         vm_object_lock_shared(object);
6248                                 }
6249                                 m = vm_page_lookup(object, offset);
6250                                 assert(m != VM_PAGE_NULL);
6251                                 assert(VM_PAGE_WIRED(m));
6252                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6253                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6254                                         if (entry->protection & VM_PROT_WRITE) {
6255                                                 vm_object_lock_assert_exclusive(
6256                                                         object);
6257                                                 m->dirty = TRUE;
6258                                         }
6259                                 } else {
6260                                         /* not already wired !? */
6261                                         *physpage_p = 0;
6262                                 }
6263                                 vm_object_unlock(object);
6264                         }
6265
6266                         /* map was not unlocked: no need to relookup */
6267                         entry = entry->vme_next;
6268                         s = entry->vme_start;
6269                         continue;
6270                 }
6271
6272                 /*
6273                  * Unwired entry or wire request transmitted via submap
6274                  */
6275
6276 #if CONFIG_EMBEDDED
6277                 /*
6278                  * Wiring would copy the pages to the shadow object.
6279                  * The shadow object would not be code-signed so
6280                  * attempting to execute code from these copied pages
6281                  * would trigger a code-signing violation.
6282                  */
6283                 if (entry->protection & VM_PROT_EXECUTE) {
6284 #if MACH_ASSERT
6285                         printf("pid %d[%s] wiring executable range from "
6286                                "0x%llx to 0x%llx: rejected to preserve "
6287                                "code-signing\n",
6288                                proc_selfpid(),
6289                                (current_task()->bsd_info
6290                                 ? proc_name_address(current_task()->bsd_info)
6291                                 : "?"),
6292                                (uint64_t) entry->vme_start,
6293                                (uint64_t) entry->vme_end);
6294 #endif /* MACH_ASSERT */
6295                         DTRACE_VM2(cs_executable_wire,
6296                                    uint64_t, (uint64_t)entry->vme_start,
6297                                    uint64_t, (uint64_t)entry->vme_end);
6298                         cs_executable_wire++;
6299                         rc = KERN_PROTECTION_FAILURE;
6300                         goto done;
6301                 }
6302 #endif /* CONFIG_EMBEDDED */
6303
6304
6305                 /*
6306                  * Perform actions of vm_map_lookup that need the write
6307                  * lock on the map: create a shadow object for a
6308                  * copy-on-write region, or an object for a zero-fill
6309                  * region.
6310                  */
6311                 size = entry->vme_end - entry->vme_start;
6312                 /*
6313                  * If wiring a copy-on-write page, we need to copy it now
6314                  * even if we're only (currently) requesting read access.
6315                  * This is aggressive, but once it's wired we can't move it.
6316                  */
6317                 if (entry->needs_copy) {
6318                         if (wire_and_extract) {
6319                                 /*
6320                                  * We're supposed to share with the original
6321                                  * provider so should not be "needs_copy"
6322                                  */
6323                                 rc = KERN_INVALID_ARGUMENT;
6324                                 goto done;
6325                         }
6326
6327                         VME_OBJECT_SHADOW(entry, size);
6328                         entry->needs_copy = FALSE;
6329                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
6330                         if (wire_and_extract) {
6331                                 /*
6332                                  * We're supposed to share with the original
6333                                  * provider so should already have an object.
6334                                  */
6335                                 rc = KERN_INVALID_ARGUMENT;
6336                                 goto done;
6337                         }
6338                         VME_OBJECT_SET(entry, vm_object_allocate(size));
6339                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
6340                         assert(entry->use_pmap);
6341                 }
6342
6343                 vm_map_clip_start(map, entry, s);
6344                 vm_map_clip_end(map, entry, end);
6345
6346                 /* re-compute "e" */
6347                 e = entry->vme_end;
6348                 if (e > end)
6349                         e = end;
6350
6351                 /*
6352                  * Check for holes and protection mismatch.
6353                  * Holes: Next entry should be contiguous unless this
6354                  *        is the end of the region.
6355                  * Protection: Access requested must be allowed, unless
6356                  *      wiring is by protection class
6357                  */
6358                 if ((entry->vme_end < end) &&
6359                     ((entry->vme_next == vm_map_to_entry(map)) ||
6360                      (entry->vme_next->vme_start > entry->vme_end))) {
6361                         /* found a hole */
6362                         rc = KERN_INVALID_ADDRESS;
6363                         goto done;
6364                 }
6365                 if ((entry->protection & access_type) != access_type) {
6366                         /* found a protection problem */
6367                         rc = KERN_PROTECTION_FAILURE;
6368                         goto done;
6369                 }
6370
6371                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
6372
6373                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS)
6374                         goto done;
6375
6376                 entry->in_transition = TRUE;
6377
6378                 /*
6379                  * This entry might get split once we unlock the map.
6380                  * In vm_fault_wire(), we need the current range as
6381                  * defined by this entry.  In order for this to work
6382                  * along with a simultaneous clip operation, we make a
6383                  * temporary copy of this entry and use that for the
6384                  * wiring.  Note that the underlying objects do not
6385                  * change during a clip.
6386                  */
6387                 tmp_entry = *entry;
6388
6389                 /*
6390                  * The in_transition state guarentees that the entry
6391                  * (or entries for this range, if split occured) will be
6392                  * there when the map lock is acquired for the second time.
6393                  */
6394                 vm_map_unlock(map);
6395
6396                 if (!user_wire && cur_thread != THREAD_NULL)
6397                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
6398                 else
6399                         interruptible_state = THREAD_UNINT;
6400
6401                 if(map_pmap)
6402                         rc = vm_fault_wire(map,
6403                                            &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
6404                                            physpage_p);
6405                 else
6406                         rc = vm_fault_wire(map,
6407                                            &tmp_entry, caller_prot, tag, map->pmap,
6408                                            tmp_entry.vme_start,
6409                                            physpage_p);
6410
6411                 if (!user_wire && cur_thread != THREAD_NULL)
6412                         thread_interrupt_level(interruptible_state);
6413
6414                 vm_map_lock(map);
6415
6416                 if (last_timestamp+1 != map->timestamp) {
6417                         /*
6418                          * Find the entry again.  It could have been clipped
6419                          * after we unlocked the map.
6420                          */
6421                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6422                                                  &first_entry))
6423                                 panic("vm_map_wire: re-lookup failed");
6424
6425                         entry = first_entry;
6426                 }
6427
6428                 last_timestamp = map->timestamp;
6429
6430                 while ((entry != vm_map_to_entry(map)) &&
6431                        (entry->vme_start < tmp_entry.vme_end)) {
6432                         assert(entry->in_transition);
6433                         entry->in_transition = FALSE;
6434                         if (entry->needs_wakeup) {
6435                                 entry->needs_wakeup = FALSE;
6436                                 need_wakeup = TRUE;
6437                         }
6438                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6439                                 subtract_wire_counts(map, entry, user_wire);
6440                         }
6441                         entry = entry->vme_next;
6442                 }
6443
6444                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
6445                         goto done;
6446                 }
6447
6448                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
6449                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
6450                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
6451                         /* found a "new" hole */
6452                         s = tmp_entry.vme_end;
6453                         rc = KERN_INVALID_ADDRESS;
6454                         goto done;
6455                 }
6456
6457                 s = entry->vme_start;
6458
6459         } /* end while loop through map entries */
6460
6461 done:
6462         if (rc == KERN_SUCCESS) {
6463                 /* repair any damage we may have made to the VM map */
6464                 vm_map_simplify_range(map, start, end);
6465         }
6466
6467         vm_map_unlock(map);
6468
6469         /*
6470          * wake up anybody waiting on entries we wired.
6471          */
6472         if (need_wakeup)
6473                 vm_map_entry_wakeup(map);
6474
6475         if (rc != KERN_SUCCESS) {
6476                 /* undo what has been wired so far */
6477                 vm_map_unwire_nested(map, start, s, user_wire,
6478                                      map_pmap, pmap_addr);
6479                 if (physpage_p) {
6480                         *physpage_p = 0;
6481                 }
6482         }
6483
6484         return rc;
6485
6486 }
6487
6488 kern_return_t
6489 vm_map_wire_external(
6490         vm_map_t                map,
6491         vm_map_offset_t         start,
6492         vm_map_offset_t         end,
6493         vm_prot_t               caller_prot,
6494         boolean_t               user_wire)
6495 {
6496         kern_return_t   kret;
6497
6498         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
6499                                   user_wire, (pmap_t)NULL, 0, NULL);
6500         return kret;
6501 }
6502
6503 kern_return_t
6504 vm_map_wire_kernel(
6505         vm_map_t                map,
6506         vm_map_offset_t         start,
6507         vm_map_offset_t         end,
6508         vm_prot_t               caller_prot,
6509         vm_tag_t                tag,
6510         boolean_t               user_wire)
6511 {
6512         kern_return_t   kret;
6513
6514         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
6515                                   user_wire, (pmap_t)NULL, 0, NULL);
6516         return kret;
6517 }
6518
6519 kern_return_t
6520 vm_map_wire_and_extract_external(
6521         vm_map_t        map,
6522         vm_map_offset_t start,
6523         vm_prot_t       caller_prot,
6524         boolean_t       user_wire,
6525         ppnum_t         *physpage_p)
6526 {
6527         kern_return_t   kret;
6528
6529         kret = vm_map_wire_nested(map,
6530                                   start,
6531                                   start+VM_MAP_PAGE_SIZE(map),
6532                                   caller_prot,
6533                                   vm_tag_bt(),
6534                                   user_wire,
6535                                   (pmap_t)NULL,
6536                                   0,
6537                                   physpage_p);
6538         if (kret != KERN_SUCCESS &&
6539             physpage_p != NULL) {
6540                 *physpage_p = 0;
6541         }
6542         return kret;
6543 }
6544
6545 kern_return_t
6546 vm_map_wire_and_extract_kernel(
6547         vm_map_t        map,
6548         vm_map_offset_t start,
6549         vm_prot_t       caller_prot,
6550         vm_tag_t        tag,
6551         boolean_t       user_wire,
6552         ppnum_t         *physpage_p)
6553 {
6554         kern_return_t   kret;
6555
6556         kret = vm_map_wire_nested(map,
6557                                   start,
6558                                   start+VM_MAP_PAGE_SIZE(map),
6559                                   caller_prot,
6560                                   tag,
6561                                   user_wire,
6562                                   (pmap_t)NULL,
6563                                   0,
6564                                   physpage_p);
6565         if (kret != KERN_SUCCESS &&
6566             physpage_p != NULL) {
6567                 *physpage_p = 0;
6568         }
6569         return kret;
6570 }
6571
6572 /*
6573  *      vm_map_unwire:
6574  *
6575  *      Sets the pageability of the specified address range in the target
6576  *      as pageable.  Regions specified must have been wired previously.
6577  *
6578  *      The map must not be locked, but a reference must remain to the map
6579  *      throughout the call.
6580  *
6581  *      Kernel will panic on failures.  User unwire ignores holes and
6582  *      unwired and intransition entries to avoid losing memory by leaving
6583  *      it unwired.
6584  */
6585 static kern_return_t
6586 vm_map_unwire_nested(
6587         vm_map_t                map,
6588         vm_map_offset_t         start,
6589         vm_map_offset_t         end,
6590         boolean_t               user_wire,
6591         pmap_t                  map_pmap,
6592         vm_map_offset_t         pmap_addr)
6593 {
6594         vm_map_entry_t          entry;
6595         struct vm_map_entry     *first_entry, tmp_entry;
6596         boolean_t               need_wakeup;
6597         boolean_t               main_map = FALSE;
6598         unsigned int            last_timestamp;
6599
6600         vm_map_lock(map);
6601         if(map_pmap == NULL)
6602                 main_map = TRUE;
6603         last_timestamp = map->timestamp;
6604
6605         VM_MAP_RANGE_CHECK(map, start, end);
6606         assert(page_aligned(start));
6607         assert(page_aligned(end));
6608         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6609         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6610
6611         if (start == end) {
6612                 /* We unwired what the caller asked for: zero pages */
6613                 vm_map_unlock(map);
6614                 return KERN_SUCCESS;
6615         }
6616
6617         if (vm_map_lookup_entry(map, start, &first_entry)) {
6618                 entry = first_entry;
6619                 /*
6620                  * vm_map_clip_start will be done later.
6621                  * We don't want to unnest any nested sub maps here !
6622                  */
6623         }
6624         else {
6625                 if (!user_wire) {
6626                         panic("vm_map_unwire: start not found");
6627                 }
6628                 /*      Start address is not in map. */
6629                 vm_map_unlock(map);
6630                 return(KERN_INVALID_ADDRESS);
6631         }
6632
6633         if (entry->superpage_size) {
6634                 /* superpages are always wired */
6635                 vm_map_unlock(map);
6636                 return KERN_INVALID_ADDRESS;
6637         }
6638
6639         need_wakeup = FALSE;
6640         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6641                 if (entry->in_transition) {
6642                         /*
6643                          * 1)
6644                          * Another thread is wiring down this entry. Note
6645                          * that if it is not for the other thread we would
6646                          * be unwiring an unwired entry.  This is not
6647                          * permitted.  If we wait, we will be unwiring memory
6648                          * we did not wire.
6649                          *
6650                          * 2)
6651                          * Another thread is unwiring this entry.  We did not
6652                          * have a reference to it, because if we did, this
6653                          * entry will not be getting unwired now.
6654                          */
6655                         if (!user_wire) {
6656                                 /*
6657                                  * XXX FBDP
6658                                  * This could happen:  there could be some
6659                                  * overlapping vslock/vsunlock operations
6660                                  * going on.
6661                                  * We should probably just wait and retry,
6662                                  * but then we have to be careful that this
6663                                  * entry could get "simplified" after
6664                                  * "in_transition" gets unset and before
6665                                  * we re-lookup the entry, so we would
6666                                  * have to re-clip the entry to avoid
6667                                  * re-unwiring what we have already unwired...
6668                                  * See vm_map_wire_nested().
6669                                  *
6670                                  * Or we could just ignore "in_transition"
6671                                  * here and proceed to decement the wired
6672                                  * count(s) on this entry.  That should be fine
6673                                  * as long as "wired_count" doesn't drop all
6674                                  * the way to 0 (and we should panic if THAT
6675                                  * happens).
6676                                  */
6677                                 panic("vm_map_unwire: in_transition entry");
6678                         }
6679
6680                         entry = entry->vme_next;
6681                         continue;
6682                 }
6683
6684                 if (entry->is_sub_map) {
6685                         vm_map_offset_t sub_start;
6686                         vm_map_offset_t sub_end;
6687                         vm_map_offset_t local_end;
6688                         pmap_t          pmap;
6689
6690                         vm_map_clip_start(map, entry, start);
6691                         vm_map_clip_end(map, entry, end);
6692
6693                         sub_start = VME_OFFSET(entry);
6694                         sub_end = entry->vme_end - entry->vme_start;
6695                         sub_end += VME_OFFSET(entry);
6696                         local_end = entry->vme_end;
6697                         if(map_pmap == NULL) {
6698                                 if(entry->use_pmap) {
6699                                         pmap = VME_SUBMAP(entry)->pmap;
6700                                         pmap_addr = sub_start;
6701                                 } else {
6702                                         pmap = map->pmap;
6703                                         pmap_addr = start;
6704                                 }
6705                                 if (entry->wired_count == 0 ||
6706                                     (user_wire && entry->user_wired_count == 0)) {
6707                                         if (!user_wire)
6708                                                 panic("vm_map_unwire: entry is unwired");
6709                                         entry = entry->vme_next;
6710                                         continue;
6711                                 }
6712
6713                                 /*
6714                                  * Check for holes
6715                                  * Holes: Next entry should be contiguous unless
6716                                  * this is the end of the region.
6717                                  */
6718                                 if (((entry->vme_end < end) &&
6719                                      ((entry->vme_next == vm_map_to_entry(map)) ||
6720                                       (entry->vme_next->vme_start
6721                                        > entry->vme_end)))) {
6722                                         if (!user_wire)
6723                                                 panic("vm_map_unwire: non-contiguous region");
6724 /*
6725                                         entry = entry->vme_next;
6726                                         continue;
6727 */
6728                                 }
6729
6730                                 subtract_wire_counts(map, entry, user_wire);
6731
6732                                 if (entry->wired_count != 0) {
6733                                         entry = entry->vme_next;
6734                                         continue;
6735                                 }
6736
6737                                 entry->in_transition = TRUE;
6738                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
6739
6740                                 /*
6741                                  * We can unlock the map now. The in_transition state
6742                                  * guarantees existance of the entry.
6743                                  */
6744                                 vm_map_unlock(map);
6745                                 vm_map_unwire_nested(VME_SUBMAP(entry),
6746                                                      sub_start, sub_end, user_wire, pmap, pmap_addr);
6747                                 vm_map_lock(map);
6748
6749                                 if (last_timestamp+1 != map->timestamp) {
6750                                         /*
6751                                          * Find the entry again.  It could have been
6752                                          * clipped or deleted after we unlocked the map.
6753                                          */
6754                                         if (!vm_map_lookup_entry(map,
6755                                                                  tmp_entry.vme_start,
6756                                                                  &first_entry)) {
6757                                                 if (!user_wire)
6758                                                         panic("vm_map_unwire: re-lookup failed");
6759                                                 entry = first_entry->vme_next;
6760                                         } else
6761                                                 entry = first_entry;
6762                                 }
6763                                 last_timestamp = map->timestamp;
6764
6765                                 /*
6766                                  * clear transition bit for all constituent entries
6767                                  * that were in the original entry (saved in
6768                                  * tmp_entry).  Also check for waiters.
6769                                  */
6770                                 while ((entry != vm_map_to_entry(map)) &&
6771                                        (entry->vme_start < tmp_entry.vme_end)) {
6772                                         assert(entry->in_transition);
6773                                         entry->in_transition = FALSE;
6774                                         if (entry->needs_wakeup) {
6775                                                 entry->needs_wakeup = FALSE;
6776                                                 need_wakeup = TRUE;
6777                                         }
6778                                         entry = entry->vme_next;
6779                                 }
6780                                 continue;
6781                         } else {
6782                                 vm_map_unlock(map);
6783                                 vm_map_unwire_nested(VME_SUBMAP(entry),
6784                                                      sub_start, sub_end, user_wire, map_pmap,
6785                                                      pmap_addr);
6786                                 vm_map_lock(map);
6787
6788                                 if (last_timestamp+1 != map->timestamp) {
6789                                         /*
6790                                          * Find the entry again.  It could have been
6791                                          * clipped or deleted after we unlocked the map.
6792                                          */
6793                                         if (!vm_map_lookup_entry(map,
6794                                                                  tmp_entry.vme_start,
6795                                                                  &first_entry)) {
6796                                                 if (!user_wire)
6797                                                         panic("vm_map_unwire: re-lookup failed");
6798                                                 entry = first_entry->vme_next;
6799                                         } else
6800                                                 entry = first_entry;
6801                                 }
6802                                 last_timestamp = map->timestamp;
6803                         }
6804                 }
6805
6806
6807                 if ((entry->wired_count == 0) ||
6808                     (user_wire && entry->user_wired_count == 0)) {
6809                         if (!user_wire)
6810                                 panic("vm_map_unwire: entry is unwired");
6811
6812                         entry = entry->vme_next;
6813                         continue;
6814                 }
6815
6816                 assert(entry->wired_count > 0 &&
6817                        (!user_wire || entry->user_wired_count > 0));
6818
6819                 vm_map_clip_start(map, entry, start);
6820                 vm_map_clip_end(map, entry, end);
6821
6822                 /*
6823                  * Check for holes
6824                  * Holes: Next entry should be contiguous unless
6825                  *        this is the end of the region.
6826                  */
6827                 if (((entry->vme_end < end) &&
6828                      ((entry->vme_next == vm_map_to_entry(map)) ||
6829                       (entry->vme_next->vme_start > entry->vme_end)))) {
6830
6831                         if (!user_wire)
6832                                 panic("vm_map_unwire: non-contiguous region");
6833                         entry = entry->vme_next;
6834                         continue;
6835                 }
6836
6837                 subtract_wire_counts(map, entry, user_wire);
6838
6839                 if (entry->wired_count != 0) {
6840                         entry = entry->vme_next;
6841                         continue;
6842                 }
6843
6844                 if(entry->zero_wired_pages) {
6845                         entry->zero_wired_pages = FALSE;
6846                 }
6847
6848                 entry->in_transition = TRUE;
6849                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
6850
6851                 /*
6852                  * We can unlock the map now. The in_transition state
6853                  * guarantees existance of the entry.
6854                  */
6855                 vm_map_unlock(map);
6856                 if(map_pmap) {
6857                         vm_fault_unwire(map,
6858                                         &tmp_entry, FALSE, map_pmap, pmap_addr);
6859                 } else {
6860                         vm_fault_unwire(map,
6861                                         &tmp_entry, FALSE, map->pmap,
6862                                         tmp_entry.vme_start);
6863                 }
6864                 vm_map_lock(map);
6865
6866                 if (last_timestamp+1 != map->timestamp) {
6867                         /*
6868                          * Find the entry again.  It could have been clipped
6869                          * or deleted after we unlocked the map.
6870                          */
6871                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
6872                                                  &first_entry)) {
6873                                 if (!user_wire)
6874                                         panic("vm_map_unwire: re-lookup failed");
6875                                 entry = first_entry->vme_next;
6876                         } else
6877                                 entry = first_entry;
6878                 }
6879                 last_timestamp = map->timestamp;
6880
6881                 /*
6882                  * clear transition bit for all constituent entries that
6883                  * were in the original entry (saved in tmp_entry).  Also
6884                  * check for waiters.
6885                  */
6886                 while ((entry != vm_map_to_entry(map)) &&
6887                        (entry->vme_start < tmp_entry.vme_end)) {
6888                         assert(entry->in_transition);
6889                         entry->in_transition = FALSE;
6890                         if (entry->needs_wakeup) {
6891                                 entry->needs_wakeup = FALSE;
6892                                 need_wakeup = TRUE;
6893                         }
6894                         entry = entry->vme_next;
6895                 }
6896         }
6897
6898         /*
6899          * We might have fragmented the address space when we wired this
6900          * range of addresses.  Attempt to re-coalesce these VM map entries
6901          * with their neighbors now that they're no longer wired.
6902          * Under some circumstances, address space fragmentation can
6903          * prevent VM object shadow chain collapsing, which can cause
6904          * swap space leaks.
6905          */
6906         vm_map_simplify_range(map, start, end);
6907
6908         vm_map_unlock(map);
6909         /*
6910          * wake up anybody waiting on entries that we have unwired.
6911          */
6912         if (need_wakeup)
6913                 vm_map_entry_wakeup(map);
6914         return(KERN_SUCCESS);
6915
6916 }
6917
6918 kern_return_t
6919 vm_map_unwire(
6920         vm_map_t                map,
6921         vm_map_offset_t         start,
6922         vm_map_offset_t         end,
6923         boolean_t               user_wire)
6924 {
6925         return vm_map_unwire_nested(map, start, end,
6926                                     user_wire, (pmap_t)NULL, 0);
6927 }
6928
6929
6930 /*
6931  *      vm_map_entry_delete:    [ internal use only ]
6932  *
6933  *      Deallocate the given entry from the target map.
6934  */
6935 static void
6936 vm_map_entry_delete(
6937         vm_map_t        map,
6938         vm_map_entry_t  entry)
6939 {
6940         vm_map_offset_t s, e;
6941         vm_object_t     object;
6942         vm_map_t        submap;
6943
6944         s = entry->vme_start;
6945         e = entry->vme_end;
6946         assert(page_aligned(s));
6947         assert(page_aligned(e));
6948         if (entry->map_aligned == TRUE) {
6949                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
6950                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
6951         }
6952         assert(entry->wired_count == 0);
6953         assert(entry->user_wired_count == 0);
6954         assert(!entry->permanent);
6955
6956         if (entry->is_sub_map) {
6957                 object = NULL;
6958                 submap = VME_SUBMAP(entry);
6959         } else {
6960                 submap = NULL;
6961                 object = VME_OBJECT(entry);
6962         }
6963
6964         vm_map_store_entry_unlink(map, entry);
6965         map->size -= e - s;
6966
6967         vm_map_entry_dispose(map, entry);
6968
6969         vm_map_unlock(map);
6970         /*
6971          *      Deallocate the object only after removing all
6972          *      pmap entries pointing to its pages.
6973          */
6974         if (submap)
6975                 vm_map_deallocate(submap);
6976         else
6977                 vm_object_deallocate(object);
6978
6979 }
6980
6981 void
6982 vm_map_submap_pmap_clean(
6983         vm_map_t        map,
6984         vm_map_offset_t start,
6985         vm_map_offset_t end,
6986         vm_map_t        sub_map,
6987         vm_map_offset_t offset)
6988 {
6989         vm_map_offset_t submap_start;
6990         vm_map_offset_t submap_end;
6991         vm_map_size_t   remove_size;
6992         vm_map_entry_t  entry;
6993
6994         submap_end = offset + (end - start);
6995         submap_start = offset;
6996
6997         vm_map_lock_read(sub_map);
6998         if(vm_map_lookup_entry(sub_map, offset, &entry)) {
6999
7000                 remove_size = (entry->vme_end - entry->vme_start);
7001                 if(offset > entry->vme_start)
7002                         remove_size -= offset - entry->vme_start;
7003
7004
7005                 if(submap_end < entry->vme_end) {
7006                         remove_size -=
7007                                 entry->vme_end - submap_end;
7008                 }
7009                 if(entry->is_sub_map) {
7010                         vm_map_submap_pmap_clean(
7011                                 sub_map,
7012                                 start,
7013                                 start + remove_size,
7014                                 VME_SUBMAP(entry),
7015                                 VME_OFFSET(entry));
7016                 } else {
7017
7018                         if((map->mapped_in_other_pmaps) && (map->ref_count)
7019                            && (VME_OBJECT(entry) != NULL)) {
7020                                 vm_object_pmap_protect_options(
7021                                         VME_OBJECT(entry),
7022                                         (VME_OFFSET(entry) +
7023                                          offset -
7024                                          entry->vme_start),
7025                                         remove_size,
7026                                         PMAP_NULL,
7027                                         entry->vme_start,
7028                                         VM_PROT_NONE,
7029                                         PMAP_OPTIONS_REMOVE);
7030                         } else {
7031                                 pmap_remove(map->pmap,
7032                                             (addr64_t)start,
7033                                             (addr64_t)(start + remove_size));
7034                         }
7035                 }
7036         }
7037
7038         entry = entry->vme_next;
7039
7040         while((entry != vm_map_to_entry(sub_map))
7041               && (entry->vme_start < submap_end)) {
7042                 remove_size = (entry->vme_end - entry->vme_start);
7043                 if(submap_end < entry->vme_end) {
7044                         remove_size -= entry->vme_end - submap_end;
7045                 }
7046                 if(entry->is_sub_map) {
7047                         vm_map_submap_pmap_clean(
7048                                 sub_map,
7049                                 (start + entry->vme_start) - offset,
7050                                 ((start + entry->vme_start) - offset) + remove_size,
7051                                 VME_SUBMAP(entry),
7052                                 VME_OFFSET(entry));
7053                 } else {
7054                         if((map->mapped_in_other_pmaps) && (map->ref_count)
7055                            && (VME_OBJECT(entry) != NULL)) {
7056                                 vm_object_pmap_protect_options(
7057                                         VME_OBJECT(entry),
7058                                         VME_OFFSET(entry),
7059                                         remove_size,
7060                                         PMAP_NULL,
7061                                         entry->vme_start,
7062                                         VM_PROT_NONE,
7063                                         PMAP_OPTIONS_REMOVE);
7064                         } else {
7065                                 pmap_remove(map->pmap,
7066                                             (addr64_t)((start + entry->vme_start)
7067                                                        - offset),
7068                                             (addr64_t)(((start + entry->vme_start)
7069                                                         - offset) + remove_size));
7070                         }
7071                 }
7072                 entry = entry->vme_next;
7073         }
7074         vm_map_unlock_read(sub_map);
7075         return;
7076 }
7077
7078 /*
7079  *      vm_map_delete:  [ internal use only ]
7080  *
7081  *      Deallocates the given address range from the target map.
7082  *      Removes all user wirings. Unwires one kernel wiring if
7083  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7084  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7085  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7086  *
7087  *      This routine is called with map locked and leaves map locked.
7088  */
7089 static kern_return_t
7090 vm_map_delete(
7091         vm_map_t                map,
7092         vm_map_offset_t         start,
7093         vm_map_offset_t         end,
7094         int                     flags,
7095         vm_map_t                zap_map)
7096 {
7097         vm_map_entry_t          entry, next;
7098         struct   vm_map_entry   *first_entry, tmp_entry;
7099         vm_map_offset_t         s;
7100         vm_object_t             object;
7101         boolean_t               need_wakeup;
7102         unsigned int            last_timestamp = ~0; /* unlikely value */
7103         int                     interruptible;
7104
7105         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7106                 THREAD_ABORTSAFE : THREAD_UNINT;
7107
7108         /*
7109          * All our DMA I/O operations in IOKit are currently done by
7110          * wiring through the map entries of the task requesting the I/O.
7111          * Because of this, we must always wait for kernel wirings
7112          * to go away on the entries before deleting them.
7113          *
7114          * Any caller who wants to actually remove a kernel wiring
7115          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7116          * properly remove one wiring instead of blasting through
7117          * them all.
7118          */
7119         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7120
7121         while(1) {
7122                 /*
7123                  *      Find the start of the region, and clip it
7124                  */
7125                 if (vm_map_lookup_entry(map, start, &first_entry)) {
7126                         entry = first_entry;
7127                         if (map == kalloc_map &&
7128                             (entry->vme_start != start ||
7129                              entry->vme_end != end)) {
7130                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7131                                       "mismatched entry %p [0x%llx:0x%llx]\n",
7132                                       map,
7133                                       (uint64_t)start,
7134                                       (uint64_t)end,
7135                                       entry,
7136                                       (uint64_t)entry->vme_start,
7137                                       (uint64_t)entry->vme_end);
7138                         }
7139                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */                           start = SUPERPAGE_ROUND_DOWN(start);
7140                                 start = SUPERPAGE_ROUND_DOWN(start);
7141                                 continue;
7142                         }
7143                         if (start == entry->vme_start) {
7144                                 /*
7145                                  * No need to clip.  We don't want to cause
7146                                  * any unnecessary unnesting in this case...
7147                                  */
7148                         } else {
7149                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7150                                     entry->map_aligned &&
7151                                     !VM_MAP_PAGE_ALIGNED(
7152                                             start,
7153                                             VM_MAP_PAGE_MASK(map))) {
7154                                         /*
7155                                          * The entry will no longer be
7156                                          * map-aligned after clipping
7157                                          * and the caller said it's OK.
7158                                          */
7159                                         entry->map_aligned = FALSE;
7160                                 }
7161                                 if (map == kalloc_map) {
7162                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
7163                                               " clipping %p at 0x%llx\n",
7164                                               map,
7165                                               (uint64_t)start,
7166                                               (uint64_t)end,
7167                                               entry,
7168                                               (uint64_t)start);
7169                                 }
7170                                 vm_map_clip_start(map, entry, start);
7171                         }
7172
7173                         /*
7174                          *      Fix the lookup hint now, rather than each
7175                          *      time through the loop.
7176                          */
7177                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7178                 } else {
7179                         if (map->pmap == kernel_pmap &&
7180                             map->ref_count != 0) {
7181                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7182                                       "no map entry at 0x%llx\n",
7183                                       map,
7184                                       (uint64_t)start,
7185                                       (uint64_t)end,
7186                                       (uint64_t)start);
7187                         }
7188                         entry = first_entry->vme_next;
7189                 }
7190                 break;
7191         }
7192         if (entry->superpage_size)
7193                 end = SUPERPAGE_ROUND_UP(end);
7194
7195         need_wakeup = FALSE;
7196         /*
7197          *      Step through all entries in this region
7198          */
7199         s = entry->vme_start;
7200         while ((entry != vm_map_to_entry(map)) && (s < end)) {
7201                 /*
7202                  * At this point, we have deleted all the memory entries
7203                  * between "start" and "s".  We still need to delete
7204                  * all memory entries between "s" and "end".
7205                  * While we were blocked and the map was unlocked, some
7206                  * new memory entries could have been re-allocated between
7207                  * "start" and "s" and we don't want to mess with those.
7208                  * Some of those entries could even have been re-assembled
7209                  * with an entry after "s" (in vm_map_simplify_entry()), so
7210                  * we may have to vm_map_clip_start() again.
7211                  */
7212
7213                 if (entry->vme_start >= s) {
7214                         /*
7215                          * This entry starts on or after "s"
7216                          * so no need to clip its start.
7217                          */
7218                 } else {
7219                         /*
7220                          * This entry has been re-assembled by a
7221                          * vm_map_simplify_entry().  We need to
7222                          * re-clip its start.
7223                          */
7224                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7225                             entry->map_aligned &&
7226                             !VM_MAP_PAGE_ALIGNED(s,
7227                                                  VM_MAP_PAGE_MASK(map))) {
7228                                 /*
7229                                  * The entry will no longer be map-aligned
7230                                  * after clipping and the caller said it's OK.
7231                                  */
7232                                 entry->map_aligned = FALSE;
7233                         }
7234                         if (map == kalloc_map) {
7235                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7236                                       "clipping %p at 0x%llx\n",
7237                                       map,
7238                                       (uint64_t)start,
7239                                       (uint64_t)end,
7240                                       entry,
7241                                       (uint64_t)s);
7242                         }
7243                         vm_map_clip_start(map, entry, s);
7244                 }
7245                 if (entry->vme_end <= end) {
7246                         /*
7247                          * This entry is going away completely, so no need
7248                          * to clip and possibly cause an unnecessary unnesting.
7249                          */
7250                 } else {
7251                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
7252                             entry->map_aligned &&
7253                             !VM_MAP_PAGE_ALIGNED(end,
7254                                                  VM_MAP_PAGE_MASK(map))) {
7255                                 /*
7256                                  * The entry will no longer be map-aligned
7257                                  * after clipping and the caller said it's OK.
7258                                  */
7259                                 entry->map_aligned = FALSE;
7260                         }
7261                         if (map == kalloc_map) {
7262                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
7263                                       "clipping %p at 0x%llx\n",
7264                                       map,
7265                                       (uint64_t)start,
7266                                       (uint64_t)end,
7267                                       entry,
7268                                       (uint64_t)end);
7269                         }
7270                         vm_map_clip_end(map, entry, end);
7271                 }
7272
7273                 if (entry->permanent) {
7274                         if (map->pmap == kernel_pmap) {
7275                                 panic("%s(%p,0x%llx,0x%llx): "
7276                                       "attempt to remove permanent "
7277                                       "VM map entry "
7278                                       "%p [0x%llx:0x%llx]\n",
7279                                       __FUNCTION__,
7280                                       map,
7281                                       (uint64_t) start,
7282                                       (uint64_t) end,
7283                                       entry,
7284                                       (uint64_t) entry->vme_start,
7285                                       (uint64_t) entry->vme_end);
7286                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
7287 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
7288                                 entry->permanent = FALSE;
7289                         } else {
7290                                 if (!vm_map_executable_immutable_no_log) {
7291                                         printf("%d[%s] %s(0x%llx,0x%llx): "
7292                                                    "permanent entry [0x%llx:0x%llx] "
7293                                                    "prot 0x%x/0x%x\n",
7294                                                    proc_selfpid(),
7295                                                    (current_task()->bsd_info
7296                                                         ? proc_name_address(current_task()->bsd_info)
7297                                                         : "?"),
7298                                                    __FUNCTION__,
7299                                                    (uint64_t) start,
7300                                                    (uint64_t) end,
7301                                                    (uint64_t)entry->vme_start,
7302                                                    (uint64_t)entry->vme_end,
7303                                                    entry->protection,
7304                                                    entry->max_protection);
7305                                 }
7306                                 /*
7307                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
7308                                  */
7309                                 DTRACE_VM5(vm_map_delete_permanent,
7310                                            vm_map_offset_t, entry->vme_start,
7311                                            vm_map_offset_t, entry->vme_end,
7312                                            vm_prot_t, entry->protection,
7313                                            vm_prot_t, entry->max_protection,
7314                                            int, VME_ALIAS(entry));
7315                         }
7316                 }
7317
7318
7319                 if (entry->in_transition) {
7320                         wait_result_t wait_result;
7321
7322                         /*
7323                          * Another thread is wiring/unwiring this entry.
7324                          * Let the other thread know we are waiting.
7325                          */
7326                         assert(s == entry->vme_start);
7327                         entry->needs_wakeup = TRUE;
7328
7329                         /*
7330                          * wake up anybody waiting on entries that we have
7331                          * already unwired/deleted.
7332                          */
7333                         if (need_wakeup) {
7334                                 vm_map_entry_wakeup(map);
7335                                 need_wakeup = FALSE;
7336                         }
7337
7338                         wait_result = vm_map_entry_wait(map, interruptible);
7339
7340                         if (interruptible &&
7341                             wait_result == THREAD_INTERRUPTED) {
7342                                 /*
7343                                  * We do not clear the needs_wakeup flag,
7344                                  * since we cannot tell if we were the only one.
7345                                  */
7346                                 return KERN_ABORTED;
7347                         }
7348
7349                         /*
7350                          * The entry could have been clipped or it
7351                          * may not exist anymore.  Look it up again.
7352                          */
7353                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
7354                                 /*
7355                                  * User: use the next entry
7356                                  */
7357                                 entry = first_entry->vme_next;
7358                                 s = entry->vme_start;
7359                         } else {
7360                                 entry = first_entry;
7361                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7362                         }
7363                         last_timestamp = map->timestamp;
7364                         continue;
7365                 } /* end in_transition */
7366
7367                 if (entry->wired_count) {
7368                         boolean_t       user_wire;
7369
7370                         user_wire = entry->user_wired_count > 0;
7371
7372                         /*
7373                          *      Remove a kernel wiring if requested
7374                          */
7375                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
7376                                 entry->wired_count--;
7377                         }
7378
7379                         /*
7380                          *      Remove all user wirings for proper accounting
7381                          */
7382                         if (entry->user_wired_count > 0) {
7383                                 while (entry->user_wired_count)
7384                                         subtract_wire_counts(map, entry, user_wire);
7385                         }
7386
7387                         if (entry->wired_count != 0) {
7388                                 assert(map != kernel_map);
7389                                 /*
7390                                  * Cannot continue.  Typical case is when
7391                                  * a user thread has physical io pending on
7392                                  * on this page.  Either wait for the
7393                                  * kernel wiring to go away or return an
7394                                  * error.
7395                                  */
7396                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
7397                                         wait_result_t wait_result;
7398
7399                                         assert(s == entry->vme_start);
7400                                         entry->needs_wakeup = TRUE;
7401                                         wait_result = vm_map_entry_wait(map,
7402                                                                         interruptible);
7403
7404                                         if (interruptible &&
7405                                             wait_result == THREAD_INTERRUPTED) {
7406                                                 /*
7407                                                  * We do not clear the
7408                                                  * needs_wakeup flag, since we
7409                                                  * cannot tell if we were the
7410                                                  * only one.
7411                                                  */
7412                                                 return KERN_ABORTED;
7413                                         }
7414
7415                                         /*
7416                                          * The entry could have been clipped or
7417                                          * it may not exist anymore.  Look it
7418                                          * up again.
7419                                          */
7420                                         if (!vm_map_lookup_entry(map, s,
7421                                                                  &first_entry)) {
7422                                                 assert(map != kernel_map);
7423                                                 /*
7424                                                  * User: use the next entry
7425                                                  */
7426                                                 entry = first_entry->vme_next;
7427                                                 s = entry->vme_start;
7428                                         } else {
7429                                                 entry = first_entry;
7430                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7431                                         }
7432                                         last_timestamp = map->timestamp;
7433                                         continue;
7434                                 }
7435                                 else {
7436                                         return KERN_FAILURE;
7437                                 }
7438                         }
7439
7440                         entry->in_transition = TRUE;
7441                         /*
7442                          * copy current entry.  see comment in vm_map_wire()
7443                          */
7444                         tmp_entry = *entry;
7445                         assert(s == entry->vme_start);
7446
7447                         /*
7448                          * We can unlock the map now. The in_transition
7449                          * state guarentees existance of the entry.
7450                          */
7451                         vm_map_unlock(map);
7452
7453                         if (tmp_entry.is_sub_map) {
7454                                 vm_map_t sub_map;
7455                                 vm_map_offset_t sub_start, sub_end;
7456                                 pmap_t pmap;
7457                                 vm_map_offset_t pmap_addr;
7458
7459
7460                                 sub_map = VME_SUBMAP(&tmp_entry);
7461                                 sub_start = VME_OFFSET(&tmp_entry);
7462                                 sub_end = sub_start + (tmp_entry.vme_end -
7463                                                        tmp_entry.vme_start);
7464                                 if (tmp_entry.use_pmap) {
7465                                         pmap = sub_map->pmap;
7466                                         pmap_addr = tmp_entry.vme_start;
7467                                 } else {
7468                                         pmap = map->pmap;
7469                                         pmap_addr = tmp_entry.vme_start;
7470                                 }
7471                                 (void) vm_map_unwire_nested(sub_map,
7472                                                             sub_start, sub_end,
7473                                                             user_wire,
7474                                                             pmap, pmap_addr);
7475                         } else {
7476
7477                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
7478                                         pmap_protect_options(
7479                                                 map->pmap,
7480                                                 tmp_entry.vme_start,
7481                                                 tmp_entry.vme_end,
7482                                                 VM_PROT_NONE,
7483                                                 PMAP_OPTIONS_REMOVE,
7484                                                 NULL);
7485                                 }
7486                                 vm_fault_unwire(map, &tmp_entry,
7487                                                 VME_OBJECT(&tmp_entry) == kernel_object,
7488                                                 map->pmap, tmp_entry.vme_start);
7489                         }
7490
7491                         vm_map_lock(map);
7492
7493                         if (last_timestamp+1 != map->timestamp) {
7494                                 /*
7495                                  * Find the entry again.  It could have
7496                                  * been clipped after we unlocked the map.
7497                                  */
7498                                 if (!vm_map_lookup_entry(map, s, &first_entry)){
7499                                         assert((map != kernel_map) &&
7500                                                (!entry->is_sub_map));
7501                                         first_entry = first_entry->vme_next;
7502                                         s = first_entry->vme_start;
7503                                 } else {
7504                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7505                                 }
7506                         } else {
7507                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7508                                 first_entry = entry;
7509                         }
7510
7511                         last_timestamp = map->timestamp;
7512
7513                         entry = first_entry;
7514                         while ((entry != vm_map_to_entry(map)) &&
7515                                (entry->vme_start < tmp_entry.vme_end)) {
7516                                 assert(entry->in_transition);
7517                                 entry->in_transition = FALSE;
7518                                 if (entry->needs_wakeup) {
7519                                         entry->needs_wakeup = FALSE;
7520                                         need_wakeup = TRUE;
7521                                 }
7522                                 entry = entry->vme_next;
7523                         }
7524                         /*
7525                          * We have unwired the entry(s).  Go back and
7526                          * delete them.
7527                          */
7528                         entry = first_entry;
7529                         continue;
7530                 }
7531
7532                 /* entry is unwired */
7533                 assert(entry->wired_count == 0);
7534                 assert(entry->user_wired_count == 0);
7535
7536                 assert(s == entry->vme_start);
7537
7538                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
7539                         /*
7540                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
7541                          * vm_map_delete(), some map entries might have been
7542                          * transferred to a "zap_map", which doesn't have a
7543                          * pmap.  The original pmap has already been flushed
7544                          * in the vm_map_delete() call targeting the original
7545                          * map, but when we get to destroying the "zap_map",
7546                          * we don't have any pmap to flush, so let's just skip
7547                          * all this.
7548                          */
7549                 } else if (entry->is_sub_map) {
7550                         if (entry->use_pmap) {
7551 #ifndef NO_NESTED_PMAP
7552                                 int pmap_flags;
7553
7554                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
7555                                         /*
7556                                          * This is the final cleanup of the
7557                                          * address space being terminated.
7558                                          * No new mappings are expected and
7559                                          * we don't really need to unnest the
7560                                          * shared region (and lose the "global"
7561                                          * pmap mappings, if applicable).
7562                                          *
7563                                          * Tell the pmap layer that we're
7564                                          * "clean" wrt nesting.
7565                                          */
7566                                         pmap_flags = PMAP_UNNEST_CLEAN;
7567                                 } else {
7568                                         /*
7569                                          * We're unmapping part of the nested
7570                                          * shared region, so we can't keep the
7571                                          * nested pmap.
7572                                          */
7573                                         pmap_flags = 0;
7574                                 }
7575                                 pmap_unnest_options(
7576                                         map->pmap,
7577                                         (addr64_t)entry->vme_start,
7578                                         entry->vme_end - entry->vme_start,
7579                                         pmap_flags);
7580 #endif  /* NO_NESTED_PMAP */
7581                                 if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7582                                         /* clean up parent map/maps */
7583                                         vm_map_submap_pmap_clean(
7584                                                 map, entry->vme_start,
7585                                                 entry->vme_end,
7586                                                 VME_SUBMAP(entry),
7587                                                 VME_OFFSET(entry));
7588                                 }
7589                         } else {
7590                                 vm_map_submap_pmap_clean(
7591                                         map, entry->vme_start, entry->vme_end,
7592                                         VME_SUBMAP(entry),
7593                                         VME_OFFSET(entry));
7594                         }
7595                 } else if (VME_OBJECT(entry) != kernel_object &&
7596                            VME_OBJECT(entry) != compressor_object) {
7597                         object = VME_OBJECT(entry);
7598                         if ((map->mapped_in_other_pmaps) && (map->ref_count)) {
7599                                 vm_object_pmap_protect_options(
7600                                         object, VME_OFFSET(entry),
7601                                         entry->vme_end - entry->vme_start,
7602                                         PMAP_NULL,
7603                                         entry->vme_start,
7604                                         VM_PROT_NONE,
7605                                         PMAP_OPTIONS_REMOVE);
7606                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
7607                                    (map->pmap == kernel_pmap)) {
7608                                 /* Remove translations associated
7609                                  * with this range unless the entry
7610                                  * does not have an object, or
7611                                  * it's the kernel map or a descendant
7612                                  * since the platform could potentially
7613                                  * create "backdoor" mappings invisible
7614                                  * to the VM. It is expected that
7615                                  * objectless, non-kernel ranges
7616                                  * do not have such VM invisible
7617                                  * translations.
7618                                  */
7619                                 pmap_remove_options(map->pmap,
7620                                                     (addr64_t)entry->vme_start,
7621                                                     (addr64_t)entry->vme_end,
7622                                                     PMAP_OPTIONS_REMOVE);
7623                         }
7624                 }
7625
7626                 if (entry->iokit_acct) {
7627                         /* alternate accounting */
7628                         DTRACE_VM4(vm_map_iokit_unmapped_region,
7629                                    vm_map_t, map,
7630                                    vm_map_offset_t, entry->vme_start,
7631                                    vm_map_offset_t, entry->vme_end,
7632                                    int, VME_ALIAS(entry));
7633                         vm_map_iokit_unmapped_region(map,
7634                                                      (entry->vme_end -
7635                                                       entry->vme_start));
7636                         entry->iokit_acct = FALSE;
7637                 }
7638
7639                 /*
7640                  * All pmap mappings for this map entry must have been
7641                  * cleared by now.
7642                  */
7643 #if DEBUG
7644                 assert(vm_map_pmap_is_empty(map,
7645                                             entry->vme_start,
7646                                             entry->vme_end));
7647 #endif /* DEBUG */
7648
7649                 next = entry->vme_next;
7650
7651                 if (map->pmap == kernel_pmap &&
7652                     map->ref_count != 0 &&
7653                     entry->vme_end < end &&
7654                     (next == vm_map_to_entry(map) ||
7655                      next->vme_start != entry->vme_end)) {
7656                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
7657                               "hole after %p at 0x%llx\n",
7658                               map,
7659                               (uint64_t)start,
7660                               (uint64_t)end,
7661                               entry,
7662                               (uint64_t)entry->vme_end);
7663                 }
7664
7665                 s = next->vme_start;
7666                 last_timestamp = map->timestamp;
7667
7668                 if (entry->permanent) {
7669                         /*
7670                          * A permanent entry can not be removed, so leave it
7671                          * in place but remove all access permissions.
7672                          */
7673                         entry->protection = VM_PROT_NONE;
7674                         entry->max_protection = VM_PROT_NONE;
7675                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
7676                            zap_map != VM_MAP_NULL) {
7677                         vm_map_size_t entry_size;
7678                         /*
7679                          * The caller wants to save the affected VM map entries
7680                          * into the "zap_map".  The caller will take care of
7681                          * these entries.
7682                          */
7683                         /* unlink the entry from "map" ... */
7684                         vm_map_store_entry_unlink(map, entry);
7685                         /* ... and add it to the end of the "zap_map" */
7686                         vm_map_store_entry_link(zap_map,
7687                                           vm_map_last_entry(zap_map),
7688                                           entry);
7689                         entry_size = entry->vme_end - entry->vme_start;
7690                         map->size -= entry_size;
7691                         zap_map->size += entry_size;
7692                         /* we didn't unlock the map, so no timestamp increase */
7693                         last_timestamp--;
7694                 } else {
7695                         vm_map_entry_delete(map, entry);
7696                         /* vm_map_entry_delete unlocks the map */
7697                         vm_map_lock(map);
7698                 }
7699
7700                 entry = next;
7701
7702                 if(entry == vm_map_to_entry(map)) {
7703                         break;
7704                 }
7705                 if (last_timestamp+1 != map->timestamp) {
7706                         /*
7707                          * we are responsible for deleting everything
7708                          * from the give space, if someone has interfered
7709                          * we pick up where we left off, back fills should
7710                          * be all right for anyone except map_delete and
7711                          * we have to assume that the task has been fully
7712                          * disabled before we get here
7713                          */
7714                         if (!vm_map_lookup_entry(map, s, &entry)){
7715                                 entry = entry->vme_next;
7716                                 s = entry->vme_start;
7717                         } else {
7718                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
7719                         }
7720                         /*
7721                          * others can not only allocate behind us, we can
7722                          * also see coalesce while we don't have the map lock
7723                          */
7724                         if(entry == vm_map_to_entry(map)) {
7725                                 break;
7726                         }
7727                 }
7728                 last_timestamp = map->timestamp;
7729         }
7730
7731         if (map->wait_for_space)
7732                 thread_wakeup((event_t) map);
7733         /*
7734          * wake up anybody waiting on entries that we have already deleted.
7735          */
7736         if (need_wakeup)
7737                 vm_map_entry_wakeup(map);
7738
7739         return KERN_SUCCESS;
7740 }
7741
7742 /*
7743  *      vm_map_remove:
7744  *
7745  *      Remove the given address range from the target map.
7746  *      This is the exported form of vm_map_delete.
7747  */
7748 kern_return_t
7749 vm_map_remove(
7750         vm_map_t        map,
7751         vm_map_offset_t start,
7752         vm_map_offset_t end,
7753          boolean_t      flags)
7754 {
7755         kern_return_t   result;
7756
7757         vm_map_lock(map);
7758         VM_MAP_RANGE_CHECK(map, start, end);
7759         /*
7760          * For the zone_map, the kernel controls the allocation/freeing of memory.
7761          * Any free to the zone_map should be within the bounds of the map and
7762          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
7763          * free to the zone_map into a no-op, there is a problem and we should
7764          * panic.
7765          */
7766         if ((map == zone_map) && (start == end))
7767                 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start);
7768         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7769         vm_map_unlock(map);
7770
7771         return(result);
7772 }
7773
7774 /*
7775  *      vm_map_remove_locked:
7776  *
7777  *      Remove the given address range from the target locked map.
7778  *      This is the exported form of vm_map_delete.
7779  */
7780 kern_return_t
7781 vm_map_remove_locked(
7782         vm_map_t        map,
7783         vm_map_offset_t start,
7784         vm_map_offset_t end,
7785         boolean_t       flags)
7786 {
7787         kern_return_t   result;
7788
7789         VM_MAP_RANGE_CHECK(map, start, end);
7790         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
7791         return(result);
7792 }
7793
7794
7795 /*
7796  *      Routine:        vm_map_copy_discard
7797  *
7798  *      Description:
7799  *              Dispose of a map copy object (returned by
7800  *              vm_map_copyin).
7801  */
7802 void
7803 vm_map_copy_discard(
7804         vm_map_copy_t   copy)
7805 {
7806         if (copy == VM_MAP_COPY_NULL)
7807                 return;
7808
7809         switch (copy->type) {
7810         case VM_MAP_COPY_ENTRY_LIST:
7811                 while (vm_map_copy_first_entry(copy) !=
7812                        vm_map_copy_to_entry(copy)) {
7813                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
7814
7815                         vm_map_copy_entry_unlink(copy, entry);
7816                         if (entry->is_sub_map) {
7817                                 vm_map_deallocate(VME_SUBMAP(entry));
7818                         } else {
7819                                 vm_object_deallocate(VME_OBJECT(entry));
7820                         }
7821                         vm_map_copy_entry_dispose(copy, entry);
7822                 }
7823                 break;
7824         case VM_MAP_COPY_OBJECT:
7825                 vm_object_deallocate(copy->cpy_object);
7826                 break;
7827         case VM_MAP_COPY_KERNEL_BUFFER:
7828
7829                 /*
7830                  * The vm_map_copy_t and possibly the data buffer were
7831                  * allocated by a single call to kalloc(), i.e. the
7832                  * vm_map_copy_t was not allocated out of the zone.
7833                  */
7834                 if (copy->size > msg_ool_size_small || copy->offset)
7835                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
7836                               (long long)copy->size, (long long)copy->offset);
7837                 kfree(copy, copy->size + cpy_kdata_hdr_sz);
7838                 return;
7839         }
7840         zfree(vm_map_copy_zone, copy);
7841 }
7842
7843 /*
7844  *      Routine:        vm_map_copy_copy
7845  *
7846  *      Description:
7847  *                      Move the information in a map copy object to
7848  *                      a new map copy object, leaving the old one
7849  *                      empty.
7850  *
7851  *                      This is used by kernel routines that need
7852  *                      to look at out-of-line data (in copyin form)
7853  *                      before deciding whether to return SUCCESS.
7854  *                      If the routine returns FAILURE, the original
7855  *                      copy object will be deallocated; therefore,
7856  *                      these routines must make a copy of the copy
7857  *                      object and leave the original empty so that
7858  *                      deallocation will not fail.
7859  */
7860 vm_map_copy_t
7861 vm_map_copy_copy(
7862         vm_map_copy_t   copy)
7863 {
7864         vm_map_copy_t   new_copy;
7865
7866         if (copy == VM_MAP_COPY_NULL)
7867                 return VM_MAP_COPY_NULL;
7868
7869         /*
7870          * Allocate a new copy object, and copy the information
7871          * from the old one into it.
7872          */
7873
7874         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
7875         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
7876         *new_copy = *copy;
7877
7878         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
7879                 /*
7880                  * The links in the entry chain must be
7881                  * changed to point to the new copy object.
7882                  */
7883                 vm_map_copy_first_entry(copy)->vme_prev
7884                         = vm_map_copy_to_entry(new_copy);
7885                 vm_map_copy_last_entry(copy)->vme_next
7886                         = vm_map_copy_to_entry(new_copy);
7887         }
7888
7889         /*
7890          * Change the old copy object into one that contains
7891          * nothing to be deallocated.
7892          */
7893         copy->type = VM_MAP_COPY_OBJECT;
7894         copy->cpy_object = VM_OBJECT_NULL;
7895
7896         /*
7897          * Return the new object.
7898          */
7899         return new_copy;
7900 }
7901
7902 static kern_return_t
7903 vm_map_overwrite_submap_recurse(
7904         vm_map_t        dst_map,
7905         vm_map_offset_t dst_addr,
7906         vm_map_size_t   dst_size)
7907 {
7908         vm_map_offset_t dst_end;
7909         vm_map_entry_t  tmp_entry;
7910         vm_map_entry_t  entry;
7911         kern_return_t   result;
7912         boolean_t       encountered_sub_map = FALSE;
7913
7914
7915
7916         /*
7917          *      Verify that the destination is all writeable
7918          *      initially.  We have to trunc the destination
7919          *      address and round the copy size or we'll end up
7920          *      splitting entries in strange ways.
7921          */
7922
7923         dst_end = vm_map_round_page(dst_addr + dst_size,
7924                                     VM_MAP_PAGE_MASK(dst_map));
7925         vm_map_lock(dst_map);
7926
7927 start_pass_1:
7928         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
7929                 vm_map_unlock(dst_map);
7930                 return(KERN_INVALID_ADDRESS);
7931         }
7932
7933         vm_map_clip_start(dst_map,
7934                           tmp_entry,
7935                           vm_map_trunc_page(dst_addr,
7936                                             VM_MAP_PAGE_MASK(dst_map)));
7937         if (tmp_entry->is_sub_map) {
7938                 /* clipping did unnest if needed */
7939                 assert(!tmp_entry->use_pmap);
7940         }
7941
7942         for (entry = tmp_entry;;) {
7943                 vm_map_entry_t  next;
7944
7945                 next = entry->vme_next;
7946                 while(entry->is_sub_map) {
7947                         vm_map_offset_t sub_start;
7948                         vm_map_offset_t sub_end;
7949                         vm_map_offset_t local_end;
7950
7951                         if (entry->in_transition) {
7952                                 /*
7953                                  * Say that we are waiting, and wait for entry.
7954                                  */
7955                                 entry->needs_wakeup = TRUE;
7956                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
7957
7958                                 goto start_pass_1;
7959                         }
7960
7961                         encountered_sub_map = TRUE;
7962                         sub_start = VME_OFFSET(entry);
7963
7964                         if(entry->vme_end < dst_end)
7965                                 sub_end = entry->vme_end;
7966                         else
7967                                 sub_end = dst_end;
7968                         sub_end -= entry->vme_start;
7969                         sub_end += VME_OFFSET(entry);
7970                         local_end = entry->vme_end;
7971                         vm_map_unlock(dst_map);
7972
7973                         result = vm_map_overwrite_submap_recurse(
7974                                 VME_SUBMAP(entry),
7975                                 sub_start,
7976                                 sub_end - sub_start);
7977
7978                         if(result != KERN_SUCCESS)
7979                                 return result;
7980                         if (dst_end <= entry->vme_end)
7981                                 return KERN_SUCCESS;
7982                         vm_map_lock(dst_map);
7983                         if(!vm_map_lookup_entry(dst_map, local_end,
7984                                                 &tmp_entry)) {
7985                                 vm_map_unlock(dst_map);
7986                                 return(KERN_INVALID_ADDRESS);
7987                         }
7988                         entry = tmp_entry;
7989                         next = entry->vme_next;
7990                 }
7991
7992                 if ( ! (entry->protection & VM_PROT_WRITE)) {
7993                         vm_map_unlock(dst_map);
7994                         return(KERN_PROTECTION_FAILURE);
7995                 }
7996
7997                 /*
7998                  *      If the entry is in transition, we must wait
7999                  *      for it to exit that state.  Anything could happen
8000                  *      when we unlock the map, so start over.
8001                  */
8002                 if (entry->in_transition) {
8003
8004                         /*
8005                          * Say that we are waiting, and wait for entry.
8006                          */
8007                         entry->needs_wakeup = TRUE;
8008                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8009
8010                         goto start_pass_1;
8011                 }
8012
8013 /*
8014  *              our range is contained completely within this map entry
8015  */
8016                 if (dst_end <= entry->vme_end) {
8017                         vm_map_unlock(dst_map);
8018                         return KERN_SUCCESS;
8019                 }
8020 /*
8021  *              check that range specified is contiguous region
8022  */
8023                 if ((next == vm_map_to_entry(dst_map)) ||
8024                     (next->vme_start != entry->vme_end)) {
8025                         vm_map_unlock(dst_map);
8026                         return(KERN_INVALID_ADDRESS);
8027                 }
8028
8029                 /*
8030                  *      Check for permanent objects in the destination.
8031                  */
8032                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8033                     ((!VME_OBJECT(entry)->internal) ||
8034                      (VME_OBJECT(entry)->true_share))) {
8035                         if(encountered_sub_map) {
8036                                 vm_map_unlock(dst_map);
8037                                 return(KERN_FAILURE);
8038                         }
8039                 }
8040
8041
8042                 entry = next;
8043         }/* for */
8044         vm_map_unlock(dst_map);
8045         return(KERN_SUCCESS);
8046 }
8047
8048 /*
8049  *      Routine:        vm_map_copy_overwrite
8050  *
8051  *      Description:
8052  *              Copy the memory described by the map copy
8053  *              object (copy; returned by vm_map_copyin) onto
8054  *              the specified destination region (dst_map, dst_addr).
8055  *              The destination must be writeable.
8056  *
8057  *              Unlike vm_map_copyout, this routine actually
8058  *              writes over previously-mapped memory.  If the
8059  *              previous mapping was to a permanent (user-supplied)
8060  *              memory object, it is preserved.
8061  *
8062  *              The attributes (protection and inheritance) of the
8063  *              destination region are preserved.
8064  *
8065  *              If successful, consumes the copy object.
8066  *              Otherwise, the caller is responsible for it.
8067  *
8068  *      Implementation notes:
8069  *              To overwrite aligned temporary virtual memory, it is
8070  *              sufficient to remove the previous mapping and insert
8071  *              the new copy.  This replacement is done either on
8072  *              the whole region (if no permanent virtual memory
8073  *              objects are embedded in the destination region) or
8074  *              in individual map entries.
8075  *
8076  *              To overwrite permanent virtual memory , it is necessary
8077  *              to copy each page, as the external memory management
8078  *              interface currently does not provide any optimizations.
8079  *
8080  *              Unaligned memory also has to be copied.  It is possible
8081  *              to use 'vm_trickery' to copy the aligned data.  This is
8082  *              not done but not hard to implement.
8083  *
8084  *              Once a page of permanent memory has been overwritten,
8085  *              it is impossible to interrupt this function; otherwise,
8086  *              the call would be neither atomic nor location-independent.
8087  *              The kernel-state portion of a user thread must be
8088  *              interruptible.
8089  *
8090  *              It may be expensive to forward all requests that might
8091  *              overwrite permanent memory (vm_write, vm_copy) to
8092  *              uninterruptible kernel threads.  This routine may be
8093  *              called by interruptible threads; however, success is
8094  *              not guaranteed -- if the request cannot be performed
8095  *              atomically and interruptibly, an error indication is
8096  *              returned.
8097  */
8098
8099 static kern_return_t
8100 vm_map_copy_overwrite_nested(
8101         vm_map_t                dst_map,
8102         vm_map_address_t        dst_addr,
8103         vm_map_copy_t           copy,
8104         boolean_t               interruptible,
8105         pmap_t                  pmap,
8106         boolean_t               discard_on_success)
8107 {
8108         vm_map_offset_t         dst_end;
8109         vm_map_entry_t          tmp_entry;
8110         vm_map_entry_t          entry;
8111         kern_return_t           kr;
8112         boolean_t               aligned = TRUE;
8113         boolean_t               contains_permanent_objects = FALSE;
8114         boolean_t               encountered_sub_map = FALSE;
8115         vm_map_offset_t         base_addr;
8116         vm_map_size_t           copy_size;
8117         vm_map_size_t           total_size;
8118
8119
8120         /*
8121          *      Check for null copy object.
8122          */
8123
8124         if (copy == VM_MAP_COPY_NULL)
8125                 return(KERN_SUCCESS);
8126
8127         /*
8128          *      Check for special kernel buffer allocated
8129          *      by new_ipc_kmsg_copyin.
8130          */
8131
8132         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8133                 return(vm_map_copyout_kernel_buffer(
8134                                dst_map, &dst_addr,
8135                                copy, copy->size, TRUE, discard_on_success));
8136         }
8137
8138         /*
8139          *      Only works for entry lists at the moment.  Will
8140          *      support page lists later.
8141          */
8142
8143         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8144
8145         if (copy->size == 0) {
8146                 if (discard_on_success)
8147                         vm_map_copy_discard(copy);
8148                 return(KERN_SUCCESS);
8149         }
8150
8151         /*
8152          *      Verify that the destination is all writeable
8153          *      initially.  We have to trunc the destination
8154          *      address and round the copy size or we'll end up
8155          *      splitting entries in strange ways.
8156          */
8157
8158         if (!VM_MAP_PAGE_ALIGNED(copy->size,
8159                                  VM_MAP_PAGE_MASK(dst_map)) ||
8160             !VM_MAP_PAGE_ALIGNED(copy->offset,
8161                                  VM_MAP_PAGE_MASK(dst_map)) ||
8162             !VM_MAP_PAGE_ALIGNED(dst_addr,
8163                                  VM_MAP_PAGE_MASK(dst_map)))
8164         {
8165                 aligned = FALSE;
8166                 dst_end = vm_map_round_page(dst_addr + copy->size,
8167                                             VM_MAP_PAGE_MASK(dst_map));
8168         } else {
8169                 dst_end = dst_addr + copy->size;
8170         }
8171
8172         vm_map_lock(dst_map);
8173
8174         /* LP64todo - remove this check when vm_map_commpage64()
8175          * no longer has to stuff in a map_entry for the commpage
8176          * above the map's max_offset.
8177          */
8178         if (dst_addr >= dst_map->max_offset) {
8179                 vm_map_unlock(dst_map);
8180                 return(KERN_INVALID_ADDRESS);
8181         }
8182
8183 start_pass_1:
8184         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8185                 vm_map_unlock(dst_map);
8186                 return(KERN_INVALID_ADDRESS);
8187         }
8188         vm_map_clip_start(dst_map,
8189                           tmp_entry,
8190                           vm_map_trunc_page(dst_addr,
8191                                             VM_MAP_PAGE_MASK(dst_map)));
8192         for (entry = tmp_entry;;) {
8193                 vm_map_entry_t  next = entry->vme_next;
8194
8195                 while(entry->is_sub_map) {
8196                         vm_map_offset_t sub_start;
8197                         vm_map_offset_t sub_end;
8198                         vm_map_offset_t local_end;
8199
8200                         if (entry->in_transition) {
8201
8202                                 /*
8203                                  * Say that we are waiting, and wait for entry.
8204                                  */
8205                                 entry->needs_wakeup = TRUE;
8206                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8207
8208                                 goto start_pass_1;
8209                         }
8210
8211                         local_end = entry->vme_end;
8212                         if (!(entry->needs_copy)) {
8213                                 /* if needs_copy we are a COW submap */
8214                                 /* in such a case we just replace so */
8215                                 /* there is no need for the follow-  */
8216                                 /* ing check.                        */
8217                                 encountered_sub_map = TRUE;
8218                                 sub_start = VME_OFFSET(entry);
8219
8220                                 if(entry->vme_end < dst_end)
8221                                         sub_end = entry->vme_end;
8222                                 else
8223                                         sub_end = dst_end;
8224                                 sub_end -= entry->vme_start;
8225                                 sub_end += VME_OFFSET(entry);
8226                                 vm_map_unlock(dst_map);
8227
8228                                 kr = vm_map_overwrite_submap_recurse(
8229                                         VME_SUBMAP(entry),
8230                                         sub_start,
8231                                         sub_end - sub_start);
8232                                 if(kr != KERN_SUCCESS)
8233                                         return kr;
8234                                 vm_map_lock(dst_map);
8235                         }
8236
8237                         if (dst_end <= entry->vme_end)
8238                                 goto start_overwrite;
8239                         if(!vm_map_lookup_entry(dst_map, local_end,
8240                                                 &entry)) {
8241                                 vm_map_unlock(dst_map);
8242                                 return(KERN_INVALID_ADDRESS);
8243                         }
8244                         next = entry->vme_next;
8245                 }
8246
8247                 if ( ! (entry->protection & VM_PROT_WRITE)) {
8248                         vm_map_unlock(dst_map);
8249                         return(KERN_PROTECTION_FAILURE);
8250                 }
8251
8252                 /*
8253                  *      If the entry is in transition, we must wait
8254                  *      for it to exit that state.  Anything could happen
8255                  *      when we unlock the map, so start over.
8256                  */
8257                 if (entry->in_transition) {
8258
8259                         /*
8260                          * Say that we are waiting, and wait for entry.
8261                          */
8262                         entry->needs_wakeup = TRUE;
8263                         vm_map_entry_wait(dst_map, THREAD_UNINT);
8264
8265                         goto start_pass_1;
8266                 }
8267
8268 /*
8269  *              our range is contained completely within this map entry
8270  */
8271                 if (dst_end <= entry->vme_end)
8272                         break;
8273 /*
8274  *              check that range specified is contiguous region
8275  */
8276                 if ((next == vm_map_to_entry(dst_map)) ||
8277                     (next->vme_start != entry->vme_end)) {
8278                         vm_map_unlock(dst_map);
8279                         return(KERN_INVALID_ADDRESS);
8280                 }
8281
8282
8283                 /*
8284                  *      Check for permanent objects in the destination.
8285                  */
8286                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
8287                     ((!VME_OBJECT(entry)->internal) ||
8288                      (VME_OBJECT(entry)->true_share))) {
8289                         contains_permanent_objects = TRUE;
8290                 }
8291
8292                 entry = next;
8293         }/* for */
8294
8295 start_overwrite:
8296         /*
8297          *      If there are permanent objects in the destination, then
8298          *      the copy cannot be interrupted.
8299          */
8300
8301         if (interruptible && contains_permanent_objects) {
8302                 vm_map_unlock(dst_map);
8303                 return(KERN_FAILURE);   /* XXX */
8304         }
8305
8306         /*
8307          *
8308          *      Make a second pass, overwriting the data
8309          *      At the beginning of each loop iteration,
8310          *      the next entry to be overwritten is "tmp_entry"
8311          *      (initially, the value returned from the lookup above),
8312          *      and the starting address expected in that entry
8313          *      is "start".
8314          */
8315
8316         total_size = copy->size;
8317         if(encountered_sub_map) {
8318                 copy_size = 0;
8319                 /* re-calculate tmp_entry since we've had the map */
8320                 /* unlocked */
8321                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
8322                         vm_map_unlock(dst_map);
8323                         return(KERN_INVALID_ADDRESS);
8324                 }
8325         } else {
8326                 copy_size = copy->size;
8327         }
8328
8329         base_addr = dst_addr;
8330         while(TRUE) {
8331                 /* deconstruct the copy object and do in parts */
8332                 /* only in sub_map, interruptable case */
8333                 vm_map_entry_t  copy_entry;
8334                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
8335                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
8336                 int             nentries;
8337                 int             remaining_entries = 0;
8338                 vm_map_offset_t new_offset = 0;
8339
8340                 for (entry = tmp_entry; copy_size == 0;) {
8341                         vm_map_entry_t  next;
8342
8343                         next = entry->vme_next;
8344
8345                         /* tmp_entry and base address are moved along */
8346                         /* each time we encounter a sub-map.  Otherwise */
8347                         /* entry can outpase tmp_entry, and the copy_size */
8348                         /* may reflect the distance between them */
8349                         /* if the current entry is found to be in transition */
8350                         /* we will start over at the beginning or the last */
8351                         /* encounter of a submap as dictated by base_addr */
8352                         /* we will zero copy_size accordingly. */
8353                         if (entry->in_transition) {
8354                                 /*
8355                                  * Say that we are waiting, and wait for entry.
8356                                  */
8357                                 entry->needs_wakeup = TRUE;
8358                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8359
8360                                 if(!vm_map_lookup_entry(dst_map, base_addr,
8361                                                         &tmp_entry)) {
8362                                         vm_map_unlock(dst_map);
8363                                         return(KERN_INVALID_ADDRESS);
8364                                 }
8365                                 copy_size = 0;
8366                                 entry = tmp_entry;
8367                                 continue;
8368                         }
8369                         if (entry->is_sub_map) {
8370                                 vm_map_offset_t sub_start;
8371                                 vm_map_offset_t sub_end;
8372                                 vm_map_offset_t local_end;
8373
8374                                 if (entry->needs_copy) {
8375                                         /* if this is a COW submap */
8376                                         /* just back the range with a */
8377                                         /* anonymous entry */
8378                                         if(entry->vme_end < dst_end)
8379                                                 sub_end = entry->vme_end;
8380                                         else
8381                                                 sub_end = dst_end;
8382                                         if(entry->vme_start < base_addr)
8383                                                 sub_start = base_addr;
8384                                         else
8385                                                 sub_start = entry->vme_start;
8386                                         vm_map_clip_end(
8387                                                 dst_map, entry, sub_end);
8388                                         vm_map_clip_start(
8389                                                 dst_map, entry, sub_start);
8390                                         assert(!entry->use_pmap);
8391                                         entry->is_sub_map = FALSE;
8392                                         vm_map_deallocate(
8393                                                 VME_SUBMAP(entry));
8394                                         VME_OBJECT_SET(entry, NULL);
8395                                         VME_OFFSET_SET(entry, 0);
8396                                         entry->is_shared = FALSE;
8397                                         entry->needs_copy = FALSE;
8398                                         entry->protection = VM_PROT_DEFAULT;
8399                                         entry->max_protection = VM_PROT_ALL;
8400                                         entry->wired_count = 0;
8401                                         entry->user_wired_count = 0;
8402                                         if(entry->inheritance
8403                                            == VM_INHERIT_SHARE)
8404                                                 entry->inheritance = VM_INHERIT_COPY;
8405                                         continue;
8406                                 }
8407                                 /* first take care of any non-sub_map */
8408                                 /* entries to send */
8409                                 if(base_addr < entry->vme_start) {
8410                                         /* stuff to send */
8411                                         copy_size =
8412                                                 entry->vme_start - base_addr;
8413                                         break;
8414                                 }
8415                                 sub_start = VME_OFFSET(entry);
8416
8417                                 if(entry->vme_end < dst_end)
8418                                         sub_end = entry->vme_end;
8419                                 else
8420                                         sub_end = dst_end;
8421                                 sub_end -= entry->vme_start;
8422                                 sub_end += VME_OFFSET(entry);
8423                                 local_end = entry->vme_end;
8424                                 vm_map_unlock(dst_map);
8425                                 copy_size = sub_end - sub_start;
8426
8427                                 /* adjust the copy object */
8428                                 if (total_size > copy_size) {
8429                                         vm_map_size_t   local_size = 0;
8430                                         vm_map_size_t   entry_size;
8431
8432                                         nentries = 1;
8433                                         new_offset = copy->offset;
8434                                         copy_entry = vm_map_copy_first_entry(copy);
8435                                         while(copy_entry !=
8436                                               vm_map_copy_to_entry(copy)){
8437                                                 entry_size = copy_entry->vme_end -
8438                                                         copy_entry->vme_start;
8439                                                 if((local_size < copy_size) &&
8440                                                    ((local_size + entry_size)
8441                                                     >= copy_size)) {
8442                                                         vm_map_copy_clip_end(copy,
8443                                                                              copy_entry,
8444                                                                              copy_entry->vme_start +
8445                                                                              (copy_size - local_size));
8446                                                         entry_size = copy_entry->vme_end -
8447                                                                 copy_entry->vme_start;
8448                                                         local_size += entry_size;
8449                                                         new_offset += entry_size;
8450                                                 }
8451                                                 if(local_size >= copy_size) {
8452                                                         next_copy = copy_entry->vme_next;
8453                                                         copy_entry->vme_next =
8454                                                                 vm_map_copy_to_entry(copy);
8455                                                         previous_prev =
8456                                                                 copy->cpy_hdr.links.prev;
8457                                                         copy->cpy_hdr.links.prev = copy_entry;
8458                                                         copy->size = copy_size;
8459                                                         remaining_entries =
8460                                                                 copy->cpy_hdr.nentries;
8461                                                         remaining_entries -= nentries;
8462                                                         copy->cpy_hdr.nentries = nentries;
8463                                                         break;
8464                                                 } else {
8465                                                         local_size += entry_size;
8466                                                         new_offset += entry_size;
8467                                                         nentries++;
8468                                                 }
8469                                                 copy_entry = copy_entry->vme_next;
8470                                         }
8471                                 }
8472
8473                                 if((entry->use_pmap) && (pmap == NULL)) {
8474                                         kr = vm_map_copy_overwrite_nested(
8475                                                 VME_SUBMAP(entry),
8476                                                 sub_start,
8477                                                 copy,
8478                                                 interruptible,
8479                                                 VME_SUBMAP(entry)->pmap,
8480                                                 TRUE);
8481                                 } else if (pmap != NULL) {
8482                                         kr = vm_map_copy_overwrite_nested(
8483                                                 VME_SUBMAP(entry),
8484                                                 sub_start,
8485                                                 copy,
8486                                                 interruptible, pmap,
8487                                                 TRUE);
8488                                 } else {
8489                                         kr = vm_map_copy_overwrite_nested(
8490                                                 VME_SUBMAP(entry),
8491                                                 sub_start,
8492                                                 copy,
8493                                                 interruptible,
8494                                                 dst_map->pmap,
8495                                                 TRUE);
8496                                 }
8497                                 if(kr != KERN_SUCCESS) {
8498                                         if(next_copy != NULL) {
8499                                                 copy->cpy_hdr.nentries +=
8500                                                         remaining_entries;
8501                                                 copy->cpy_hdr.links.prev->vme_next =
8502                                                         next_copy;
8503                                                 copy->cpy_hdr.links.prev
8504                                                         = previous_prev;
8505                                                 copy->size = total_size;
8506                                         }
8507                                         return kr;
8508                                 }
8509                                 if (dst_end <= local_end) {
8510                                         return(KERN_SUCCESS);
8511                                 }
8512                                 /* otherwise copy no longer exists, it was */
8513                                 /* destroyed after successful copy_overwrite */
8514                                 copy = (vm_map_copy_t)
8515                                         zalloc(vm_map_copy_zone);
8516                                 copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8517                                 vm_map_copy_first_entry(copy) =
8518                                         vm_map_copy_last_entry(copy) =
8519                                         vm_map_copy_to_entry(copy);
8520                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
8521                                 copy->offset = new_offset;
8522
8523                                 /*
8524                                  * XXX FBDP
8525                                  * this does not seem to deal with
8526                                  * the VM map store (R&B tree)
8527                                  */
8528
8529                                 total_size -= copy_size;
8530                                 copy_size = 0;
8531                                 /* put back remainder of copy in container */
8532                                 if(next_copy != NULL) {
8533                                         copy->cpy_hdr.nentries = remaining_entries;
8534                                         copy->cpy_hdr.links.next = next_copy;
8535                                         copy->cpy_hdr.links.prev = previous_prev;
8536                                         copy->size = total_size;
8537                                         next_copy->vme_prev =
8538                                                 vm_map_copy_to_entry(copy);
8539                                         next_copy = NULL;
8540                                 }
8541                                 base_addr = local_end;
8542                                 vm_map_lock(dst_map);
8543                                 if(!vm_map_lookup_entry(dst_map,
8544                                                         local_end, &tmp_entry)) {
8545                                         vm_map_unlock(dst_map);
8546                                         return(KERN_INVALID_ADDRESS);
8547                                 }
8548                                 entry = tmp_entry;
8549                                 continue;
8550                         }
8551                         if (dst_end <= entry->vme_end) {
8552                                 copy_size = dst_end - base_addr;
8553                                 break;
8554                         }
8555
8556                         if ((next == vm_map_to_entry(dst_map)) ||
8557                             (next->vme_start != entry->vme_end)) {
8558                                 vm_map_unlock(dst_map);
8559                                 return(KERN_INVALID_ADDRESS);
8560                         }
8561
8562                         entry = next;
8563                 }/* for */
8564
8565                 next_copy = NULL;
8566                 nentries = 1;
8567
8568                 /* adjust the copy object */
8569                 if (total_size > copy_size) {
8570                         vm_map_size_t   local_size = 0;
8571                         vm_map_size_t   entry_size;
8572
8573                         new_offset = copy->offset;
8574                         copy_entry = vm_map_copy_first_entry(copy);
8575                         while(copy_entry != vm_map_copy_to_entry(copy)) {
8576                                 entry_size = copy_entry->vme_end -
8577                                         copy_entry->vme_start;
8578                                 if((local_size < copy_size) &&
8579                                    ((local_size + entry_size)
8580                                     >= copy_size)) {
8581                                         vm_map_copy_clip_end(copy, copy_entry,
8582                                                              copy_entry->vme_start +
8583                                                              (copy_size - local_size));
8584                                         entry_size = copy_entry->vme_end -
8585                                                 copy_entry->vme_start;
8586                                         local_size += entry_size;
8587                                         new_offset += entry_size;
8588                                 }
8589                                 if(local_size >= copy_size) {
8590                                         next_copy = copy_entry->vme_next;
8591                                         copy_entry->vme_next =
8592                                                 vm_map_copy_to_entry(copy);
8593                                         previous_prev =
8594                                                 copy->cpy_hdr.links.prev;
8595                                         copy->cpy_hdr.links.prev = copy_entry;
8596                                         copy->size = copy_size;
8597                                         remaining_entries =
8598                                                 copy->cpy_hdr.nentries;
8599                                         remaining_entries -= nentries;
8600                                         copy->cpy_hdr.nentries = nentries;
8601                                         break;
8602                                 } else {
8603                                         local_size += entry_size;
8604                                         new_offset += entry_size;
8605                                         nentries++;
8606                                 }
8607                                 copy_entry = copy_entry->vme_next;
8608                         }
8609                 }
8610
8611                 if (aligned) {
8612                         pmap_t  local_pmap;
8613
8614                         if(pmap)
8615                                 local_pmap = pmap;
8616                         else
8617                                 local_pmap = dst_map->pmap;
8618
8619                         if ((kr =  vm_map_copy_overwrite_aligned(
8620                                      dst_map, tmp_entry, copy,
8621                                      base_addr, local_pmap)) != KERN_SUCCESS) {
8622                                 if(next_copy != NULL) {
8623                                         copy->cpy_hdr.nentries +=
8624                                                 remaining_entries;
8625                                         copy->cpy_hdr.links.prev->vme_next =
8626                                                 next_copy;
8627                                         copy->cpy_hdr.links.prev =
8628                                                 previous_prev;
8629                                         copy->size += copy_size;
8630                                 }
8631                                 return kr;
8632                         }
8633                         vm_map_unlock(dst_map);
8634                 } else {
8635                         /*
8636                          * Performance gain:
8637                          *
8638                          * if the copy and dst address are misaligned but the same
8639                          * offset within the page we can copy_not_aligned the
8640                          * misaligned parts and copy aligned the rest.  If they are
8641                          * aligned but len is unaligned we simply need to copy
8642                          * the end bit unaligned.  We'll need to split the misaligned
8643                          * bits of the region in this case !
8644                          */
8645                         /* ALWAYS UNLOCKS THE dst_map MAP */
8646                         kr = vm_map_copy_overwrite_unaligned(
8647                                 dst_map,
8648                                 tmp_entry,
8649                                 copy,
8650                                 base_addr,
8651                                 discard_on_success);
8652                         if (kr != KERN_SUCCESS) {
8653                                 if(next_copy != NULL) {
8654                                         copy->cpy_hdr.nentries +=
8655                                                 remaining_entries;
8656                                         copy->cpy_hdr.links.prev->vme_next =
8657                                                 next_copy;
8658                                         copy->cpy_hdr.links.prev =
8659                                                 previous_prev;
8660                                         copy->size += copy_size;
8661                                 }
8662                                 return kr;
8663                         }
8664                 }
8665                 total_size -= copy_size;
8666                 if(total_size == 0)
8667                         break;
8668                 base_addr += copy_size;
8669                 copy_size = 0;
8670                 copy->offset = new_offset;
8671                 if(next_copy != NULL) {
8672                         copy->cpy_hdr.nentries = remaining_entries;
8673                         copy->cpy_hdr.links.next = next_copy;
8674                         copy->cpy_hdr.links.prev = previous_prev;
8675                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
8676                         copy->size = total_size;
8677                 }
8678                 vm_map_lock(dst_map);
8679                 while(TRUE) {
8680                         if (!vm_map_lookup_entry(dst_map,
8681                                                  base_addr, &tmp_entry)) {
8682                                 vm_map_unlock(dst_map);
8683                                 return(KERN_INVALID_ADDRESS);
8684                         }
8685                         if (tmp_entry->in_transition) {
8686                                 entry->needs_wakeup = TRUE;
8687                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8688                         } else {
8689                                 break;
8690                         }
8691                 }
8692                 vm_map_clip_start(dst_map,
8693                                   tmp_entry,
8694                                   vm_map_trunc_page(base_addr,
8695                                                     VM_MAP_PAGE_MASK(dst_map)));
8696
8697                 entry = tmp_entry;
8698         } /* while */
8699
8700         /*
8701          *      Throw away the vm_map_copy object
8702          */
8703         if (discard_on_success)
8704                 vm_map_copy_discard(copy);
8705
8706         return(KERN_SUCCESS);
8707 }/* vm_map_copy_overwrite */
8708
8709 kern_return_t
8710 vm_map_copy_overwrite(
8711         vm_map_t        dst_map,
8712         vm_map_offset_t dst_addr,
8713         vm_map_copy_t   copy,
8714         boolean_t       interruptible)
8715 {
8716         vm_map_size_t   head_size, tail_size;
8717         vm_map_copy_t   head_copy, tail_copy;
8718         vm_map_offset_t head_addr, tail_addr;
8719         vm_map_entry_t  entry;
8720         kern_return_t   kr;
8721         vm_map_offset_t effective_page_mask, effective_page_size;
8722
8723         head_size = 0;
8724         tail_size = 0;
8725         head_copy = NULL;
8726         tail_copy = NULL;
8727         head_addr = 0;
8728         tail_addr = 0;
8729
8730         if (interruptible ||
8731             copy == VM_MAP_COPY_NULL ||
8732             copy->type != VM_MAP_COPY_ENTRY_LIST) {
8733                 /*
8734                  * We can't split the "copy" map if we're interruptible
8735                  * or if we don't have a "copy" map...
8736                  */
8737         blunt_copy:
8738                 return vm_map_copy_overwrite_nested(dst_map,
8739                                                     dst_addr,
8740                                                     copy,
8741                                                     interruptible,
8742                                                     (pmap_t) NULL,
8743                                                     TRUE);
8744         }
8745
8746         effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
8747         effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
8748                                   effective_page_mask);
8749         effective_page_size = effective_page_mask + 1;
8750
8751         if (copy->size < 3 * effective_page_size) {
8752                 /*
8753                  * Too small to bother with optimizing...
8754                  */
8755                 goto blunt_copy;
8756         }
8757
8758         if ((dst_addr & effective_page_mask) !=
8759             (copy->offset & effective_page_mask)) {
8760                 /*
8761                  * Incompatible mis-alignment of source and destination...
8762                  */
8763                 goto blunt_copy;
8764         }
8765
8766         /*
8767          * Proper alignment or identical mis-alignment at the beginning.
8768          * Let's try and do a small unaligned copy first (if needed)
8769          * and then an aligned copy for the rest.
8770          */
8771         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
8772                 head_addr = dst_addr;
8773                 head_size = (effective_page_size -
8774                              (copy->offset & effective_page_mask));
8775                 head_size = MIN(head_size, copy->size);
8776         }
8777         if (!vm_map_page_aligned(copy->offset + copy->size,
8778                                   effective_page_mask)) {
8779                 /*
8780                  * Mis-alignment at the end.
8781                  * Do an aligned copy up to the last page and
8782                  * then an unaligned copy for the remaining bytes.
8783                  */
8784                 tail_size = ((copy->offset + copy->size) &
8785                              effective_page_mask);
8786                 tail_size = MIN(tail_size, copy->size);
8787                 tail_addr = dst_addr + copy->size - tail_size;
8788                 assert(tail_addr >= head_addr + head_size);
8789         }
8790         assert(head_size + tail_size <= copy->size);
8791
8792         if (head_size + tail_size == copy->size) {
8793                 /*
8794                  * It's all unaligned, no optimization possible...
8795                  */
8796                 goto blunt_copy;
8797         }
8798
8799         /*
8800          * Can't optimize if there are any submaps in the
8801          * destination due to the way we free the "copy" map
8802          * progressively in vm_map_copy_overwrite_nested()
8803          * in that case.
8804          */
8805         vm_map_lock_read(dst_map);
8806         if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
8807                 vm_map_unlock_read(dst_map);
8808                 goto blunt_copy;
8809         }
8810         for (;
8811              (entry != vm_map_copy_to_entry(copy) &&
8812               entry->vme_start < dst_addr + copy->size);
8813              entry = entry->vme_next) {
8814                 if (entry->is_sub_map) {
8815                         vm_map_unlock_read(dst_map);
8816                         goto blunt_copy;
8817                 }
8818         }
8819         vm_map_unlock_read(dst_map);
8820
8821         if (head_size) {
8822                 /*
8823                  * Unaligned copy of the first "head_size" bytes, to reach
8824                  * a page boundary.
8825                  */
8826
8827                 /*
8828                  * Extract "head_copy" out of "copy".
8829                  */
8830                 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8831                 head_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8832                 vm_map_copy_first_entry(head_copy) =
8833                         vm_map_copy_to_entry(head_copy);
8834                 vm_map_copy_last_entry(head_copy) =
8835                         vm_map_copy_to_entry(head_copy);
8836                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
8837                 head_copy->cpy_hdr.nentries = 0;
8838                 head_copy->cpy_hdr.entries_pageable =
8839                         copy->cpy_hdr.entries_pageable;
8840                 vm_map_store_init(&head_copy->cpy_hdr);
8841
8842                 entry = vm_map_copy_first_entry(copy);
8843                 if (entry->vme_end < copy->offset + head_size) {
8844                         head_size = entry->vme_end - copy->offset;
8845                 }
8846
8847                 head_copy->offset = copy->offset;
8848                 head_copy->size = head_size;
8849                 copy->offset += head_size;
8850                 copy->size -= head_size;
8851
8852                 vm_map_copy_clip_end(copy, entry, copy->offset);
8853                 vm_map_copy_entry_unlink(copy, entry);
8854                 vm_map_copy_entry_link(head_copy,
8855                                        vm_map_copy_to_entry(head_copy),
8856                                        entry);
8857
8858                 /*
8859                  * Do the unaligned copy.
8860                  */
8861                 kr = vm_map_copy_overwrite_nested(dst_map,
8862                                                   head_addr,
8863                                                   head_copy,
8864                                                   interruptible,
8865                                                   (pmap_t) NULL,
8866                                                   FALSE);
8867                 if (kr != KERN_SUCCESS)
8868                         goto done;
8869         }
8870
8871         if (tail_size) {
8872                 /*
8873                  * Extract "tail_copy" out of "copy".
8874                  */
8875                 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8876                 tail_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8877                 vm_map_copy_first_entry(tail_copy) =
8878                         vm_map_copy_to_entry(tail_copy);
8879                 vm_map_copy_last_entry(tail_copy) =
8880                         vm_map_copy_to_entry(tail_copy);
8881                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
8882                 tail_copy->cpy_hdr.nentries = 0;
8883                 tail_copy->cpy_hdr.entries_pageable =
8884                         copy->cpy_hdr.entries_pageable;
8885                 vm_map_store_init(&tail_copy->cpy_hdr);
8886
8887                 tail_copy->offset = copy->offset + copy->size - tail_size;
8888                 tail_copy->size = tail_size;
8889
8890                 copy->size -= tail_size;
8891
8892                 entry = vm_map_copy_last_entry(copy);
8893                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
8894                 entry = vm_map_copy_last_entry(copy);
8895                 vm_map_copy_entry_unlink(copy, entry);
8896                 vm_map_copy_entry_link(tail_copy,
8897                                        vm_map_copy_last_entry(tail_copy),
8898                                        entry);
8899         }
8900
8901         /*
8902          * Copy most (or possibly all) of the data.
8903          */
8904         kr = vm_map_copy_overwrite_nested(dst_map,
8905                                           dst_addr + head_size,
8906                                           copy,
8907                                           interruptible,
8908                                           (pmap_t) NULL,
8909                                           FALSE);
8910         if (kr != KERN_SUCCESS) {
8911                 goto done;
8912         }
8913
8914         if (tail_size) {
8915                 kr = vm_map_copy_overwrite_nested(dst_map,
8916                                                   tail_addr,
8917                                                   tail_copy,
8918                                                   interruptible,
8919                                                   (pmap_t) NULL,
8920                                                   FALSE);
8921         }
8922
8923 done:
8924         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
8925         if (kr == KERN_SUCCESS) {
8926                 /*
8927                  * Discard all the copy maps.
8928                  */
8929                 if (head_copy) {
8930                         vm_map_copy_discard(head_copy);
8931                         head_copy = NULL;
8932                 }
8933                 vm_map_copy_discard(copy);
8934                 if (tail_copy) {
8935                         vm_map_copy_discard(tail_copy);
8936                         tail_copy = NULL;
8937                 }
8938         } else {
8939                 /*
8940                  * Re-assemble the original copy map.
8941                  */
8942                 if (head_copy) {
8943                         entry = vm_map_copy_first_entry(head_copy);
8944                         vm_map_copy_entry_unlink(head_copy, entry);
8945                         vm_map_copy_entry_link(copy,
8946                                                vm_map_copy_to_entry(copy),
8947                                                entry);
8948                         copy->offset -= head_size;
8949                         copy->size += head_size;
8950                         vm_map_copy_discard(head_copy);
8951                         head_copy = NULL;
8952                 }
8953                 if (tail_copy) {
8954                         entry = vm_map_copy_last_entry(tail_copy);
8955                         vm_map_copy_entry_unlink(tail_copy, entry);
8956                         vm_map_copy_entry_link(copy,
8957                                                vm_map_copy_last_entry(copy),
8958                                                entry);
8959                         copy->size += tail_size;
8960                         vm_map_copy_discard(tail_copy);
8961                         tail_copy = NULL;
8962                 }
8963         }
8964         return kr;
8965 }
8966
8967
8968 /*
8969  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
8970  *
8971  *      Decription:
8972  *      Physically copy unaligned data
8973  *
8974  *      Implementation:
8975  *      Unaligned parts of pages have to be physically copied.  We use
8976  *      a modified form of vm_fault_copy (which understands none-aligned
8977  *      page offsets and sizes) to do the copy.  We attempt to copy as
8978  *      much memory in one go as possibly, however vm_fault_copy copies
8979  *      within 1 memory object so we have to find the smaller of "amount left"
8980  *      "source object data size" and "target object data size".  With
8981  *      unaligned data we don't need to split regions, therefore the source
8982  *      (copy) object should be one map entry, the target range may be split
8983  *      over multiple map entries however.  In any event we are pessimistic
8984  *      about these assumptions.
8985  *
8986  *      Assumptions:
8987  *      dst_map is locked on entry and is return locked on success,
8988  *      unlocked on error.
8989  */
8990
8991 static kern_return_t
8992 vm_map_copy_overwrite_unaligned(
8993         vm_map_t        dst_map,
8994         vm_map_entry_t  entry,
8995         vm_map_copy_t   copy,
8996         vm_map_offset_t start,
8997         boolean_t       discard_on_success)
8998 {
8999         vm_map_entry_t          copy_entry;
9000         vm_map_entry_t          copy_entry_next;
9001         vm_map_version_t        version;
9002         vm_object_t             dst_object;
9003         vm_object_offset_t      dst_offset;
9004         vm_object_offset_t      src_offset;
9005         vm_object_offset_t      entry_offset;
9006         vm_map_offset_t         entry_end;
9007         vm_map_size_t           src_size,
9008                                 dst_size,
9009                                 copy_size,
9010                                 amount_left;
9011         kern_return_t           kr = KERN_SUCCESS;
9012
9013
9014         copy_entry = vm_map_copy_first_entry(copy);
9015
9016         vm_map_lock_write_to_read(dst_map);
9017
9018         src_offset = copy->offset - vm_object_trunc_page(copy->offset);
9019         amount_left = copy->size;
9020 /*
9021  *      unaligned so we never clipped this entry, we need the offset into
9022  *      the vm_object not just the data.
9023  */
9024         while (amount_left > 0) {
9025
9026                 if (entry == vm_map_to_entry(dst_map)) {
9027                         vm_map_unlock_read(dst_map);
9028                         return KERN_INVALID_ADDRESS;
9029                 }
9030
9031                 /* "start" must be within the current map entry */
9032                 assert ((start>=entry->vme_start) && (start<entry->vme_end));
9033
9034                 dst_offset = start - entry->vme_start;
9035
9036                 dst_size = entry->vme_end - start;
9037
9038                 src_size = copy_entry->vme_end -
9039                         (copy_entry->vme_start + src_offset);
9040
9041                 if (dst_size < src_size) {
9042 /*
9043  *                      we can only copy dst_size bytes before
9044  *                      we have to get the next destination entry
9045  */
9046                         copy_size = dst_size;
9047                 } else {
9048 /*
9049  *                      we can only copy src_size bytes before
9050  *                      we have to get the next source copy entry
9051  */
9052                         copy_size = src_size;
9053                 }
9054
9055                 if (copy_size > amount_left) {
9056                         copy_size = amount_left;
9057                 }
9058 /*
9059  *              Entry needs copy, create a shadow shadow object for
9060  *              Copy on write region.
9061  */
9062                 if (entry->needs_copy &&
9063                     ((entry->protection & VM_PROT_WRITE) != 0))
9064                 {
9065                         if (vm_map_lock_read_to_write(dst_map)) {
9066                                 vm_map_lock_read(dst_map);
9067                                 goto RetryLookup;
9068                         }
9069                         VME_OBJECT_SHADOW(entry,
9070                                           (vm_map_size_t)(entry->vme_end
9071                                                           - entry->vme_start));
9072                         entry->needs_copy = FALSE;
9073                         vm_map_lock_write_to_read(dst_map);
9074                 }
9075                 dst_object = VME_OBJECT(entry);
9076 /*
9077  *              unlike with the virtual (aligned) copy we're going
9078  *              to fault on it therefore we need a target object.
9079  */
9080                 if (dst_object == VM_OBJECT_NULL) {
9081                         if (vm_map_lock_read_to_write(dst_map)) {
9082                                 vm_map_lock_read(dst_map);
9083                                 goto RetryLookup;
9084                         }
9085                         dst_object = vm_object_allocate((vm_map_size_t)
9086                                                         entry->vme_end - entry->vme_start);
9087                         VME_OBJECT(entry) = dst_object;
9088                         VME_OFFSET_SET(entry, 0);
9089                         assert(entry->use_pmap);
9090                         vm_map_lock_write_to_read(dst_map);
9091                 }
9092 /*
9093  *              Take an object reference and unlock map. The "entry" may
9094  *              disappear or change when the map is unlocked.
9095  */
9096                 vm_object_reference(dst_object);
9097                 version.main_timestamp = dst_map->timestamp;
9098                 entry_offset = VME_OFFSET(entry);
9099                 entry_end = entry->vme_end;
9100                 vm_map_unlock_read(dst_map);
9101 /*
9102  *              Copy as much as possible in one pass
9103  */
9104                 kr = vm_fault_copy(
9105                         VME_OBJECT(copy_entry),
9106                         VME_OFFSET(copy_entry) + src_offset,
9107                         &copy_size,
9108                         dst_object,
9109                         entry_offset + dst_offset,
9110                         dst_map,
9111                         &version,
9112                         THREAD_UNINT );
9113
9114                 start += copy_size;
9115                 src_offset += copy_size;
9116                 amount_left -= copy_size;
9117 /*
9118  *              Release the object reference
9119  */
9120                 vm_object_deallocate(dst_object);
9121 /*
9122  *              If a hard error occurred, return it now
9123  */
9124                 if (kr != KERN_SUCCESS)
9125                         return kr;
9126
9127                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
9128                     || amount_left == 0)
9129                 {
9130 /*
9131  *                      all done with this copy entry, dispose.
9132  */
9133                         copy_entry_next = copy_entry->vme_next;
9134
9135                         if (discard_on_success) {
9136                                 vm_map_copy_entry_unlink(copy, copy_entry);
9137                                 assert(!copy_entry->is_sub_map);
9138                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9139                                 vm_map_copy_entry_dispose(copy, copy_entry);
9140                         }
9141
9142                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
9143                             amount_left) {
9144 /*
9145  *                              not finished copying but run out of source
9146  */
9147                                 return KERN_INVALID_ADDRESS;
9148                         }
9149
9150                         copy_entry = copy_entry_next;
9151
9152                         src_offset = 0;
9153                 }
9154
9155                 if (amount_left == 0)
9156                         return KERN_SUCCESS;
9157
9158                 vm_map_lock_read(dst_map);
9159                 if (version.main_timestamp == dst_map->timestamp) {
9160                         if (start == entry_end) {
9161 /*
9162  *                              destination region is split.  Use the version
9163  *                              information to avoid a lookup in the normal
9164  *                              case.
9165  */
9166                                 entry = entry->vme_next;
9167 /*
9168  *                              should be contiguous. Fail if we encounter
9169  *                              a hole in the destination.
9170  */
9171                                 if (start != entry->vme_start) {
9172                                         vm_map_unlock_read(dst_map);
9173                                         return KERN_INVALID_ADDRESS ;
9174                                 }
9175                         }
9176                 } else {
9177 /*
9178  *                      Map version check failed.
9179  *                      we must lookup the entry because somebody
9180  *                      might have changed the map behind our backs.
9181  */
9182                 RetryLookup:
9183                         if (!vm_map_lookup_entry(dst_map, start, &entry))
9184                         {
9185                                 vm_map_unlock_read(dst_map);
9186                                 return KERN_INVALID_ADDRESS ;
9187                         }
9188                 }
9189         }/* while */
9190
9191         return KERN_SUCCESS;
9192 }/* vm_map_copy_overwrite_unaligned */
9193
9194 /*
9195  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
9196  *
9197  *      Description:
9198  *      Does all the vm_trickery possible for whole pages.
9199  *
9200  *      Implementation:
9201  *
9202  *      If there are no permanent objects in the destination,
9203  *      and the source and destination map entry zones match,
9204  *      and the destination map entry is not shared,
9205  *      then the map entries can be deleted and replaced
9206  *      with those from the copy.  The following code is the
9207  *      basic idea of what to do, but there are lots of annoying
9208  *      little details about getting protection and inheritance
9209  *      right.  Should add protection, inheritance, and sharing checks
9210  *      to the above pass and make sure that no wiring is involved.
9211  */
9212
9213 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
9214 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
9215 int vm_map_copy_overwrite_aligned_src_large = 0;
9216
9217 static kern_return_t
9218 vm_map_copy_overwrite_aligned(
9219         vm_map_t        dst_map,
9220         vm_map_entry_t  tmp_entry,
9221         vm_map_copy_t   copy,
9222         vm_map_offset_t start,
9223         __unused pmap_t pmap)
9224 {
9225         vm_object_t     object;
9226         vm_map_entry_t  copy_entry;
9227         vm_map_size_t   copy_size;
9228         vm_map_size_t   size;
9229         vm_map_entry_t  entry;
9230
9231         while ((copy_entry = vm_map_copy_first_entry(copy))
9232                != vm_map_copy_to_entry(copy))
9233         {
9234                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
9235
9236                 entry = tmp_entry;
9237                 if (entry->is_sub_map) {
9238                         /* unnested when clipped earlier */
9239                         assert(!entry->use_pmap);
9240                 }
9241                 if (entry == vm_map_to_entry(dst_map)) {
9242                         vm_map_unlock(dst_map);
9243                         return KERN_INVALID_ADDRESS;
9244                 }
9245                 size = (entry->vme_end - entry->vme_start);
9246                 /*
9247                  *      Make sure that no holes popped up in the
9248                  *      address map, and that the protection is
9249                  *      still valid, in case the map was unlocked
9250                  *      earlier.
9251                  */
9252
9253                 if ((entry->vme_start != start) || ((entry->is_sub_map)
9254                                                     && !entry->needs_copy)) {
9255                         vm_map_unlock(dst_map);
9256                         return(KERN_INVALID_ADDRESS);
9257                 }
9258                 assert(entry != vm_map_to_entry(dst_map));
9259
9260                 /*
9261                  *      Check protection again
9262                  */
9263
9264                 if ( ! (entry->protection & VM_PROT_WRITE)) {
9265                         vm_map_unlock(dst_map);
9266                         return(KERN_PROTECTION_FAILURE);
9267                 }
9268
9269                 /*
9270                  *      Adjust to source size first
9271                  */
9272
9273                 if (copy_size < size) {
9274                         if (entry->map_aligned &&
9275                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
9276                                                  VM_MAP_PAGE_MASK(dst_map))) {
9277                                 /* no longer map-aligned */
9278                                 entry->map_aligned = FALSE;
9279                         }
9280                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
9281                         size = copy_size;
9282                 }
9283
9284                 /*
9285                  *      Adjust to destination size
9286                  */
9287
9288                 if (size < copy_size) {
9289                         vm_map_copy_clip_end(copy, copy_entry,
9290                                              copy_entry->vme_start + size);
9291                         copy_size = size;
9292                 }
9293
9294                 assert((entry->vme_end - entry->vme_start) == size);
9295                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
9296                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
9297
9298                 /*
9299                  *      If the destination contains temporary unshared memory,
9300                  *      we can perform the copy by throwing it away and
9301                  *      installing the source data.
9302                  */
9303
9304                 object = VME_OBJECT(entry);
9305                 if ((!entry->is_shared &&
9306                      ((object == VM_OBJECT_NULL) ||
9307                       (object->internal && !object->true_share))) ||
9308                     entry->needs_copy) {
9309                         vm_object_t     old_object = VME_OBJECT(entry);
9310                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
9311                         vm_object_offset_t      offset;
9312
9313                         /*
9314                          * Ensure that the source and destination aren't
9315                          * identical
9316                          */
9317                         if (old_object == VME_OBJECT(copy_entry) &&
9318                             old_offset == VME_OFFSET(copy_entry)) {
9319                                 vm_map_copy_entry_unlink(copy, copy_entry);
9320                                 vm_map_copy_entry_dispose(copy, copy_entry);
9321
9322                                 if (old_object != VM_OBJECT_NULL)
9323                                         vm_object_deallocate(old_object);
9324
9325                                 start = tmp_entry->vme_end;
9326                                 tmp_entry = tmp_entry->vme_next;
9327                                 continue;
9328                         }
9329
9330 #if !CONFIG_EMBEDDED
9331 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
9332 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
9333                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
9334                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
9335                             copy_size <= __TRADEOFF1_COPY_SIZE) {
9336                                 /*
9337                                  * Virtual vs. Physical copy tradeoff #1.
9338                                  *
9339                                  * Copying only a few pages out of a large
9340                                  * object:  do a physical copy instead of
9341                                  * a virtual copy, to avoid possibly keeping
9342                                  * the entire large object alive because of
9343                                  * those few copy-on-write pages.
9344                                  */
9345                                 vm_map_copy_overwrite_aligned_src_large++;
9346                                 goto slow_copy;
9347                         }
9348 #endif /* !CONFIG_EMBEDDED */
9349
9350                         if ((dst_map->pmap != kernel_pmap) &&
9351                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
9352                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_LARGE_REUSED)) {
9353                                 vm_object_t new_object, new_shadow;
9354
9355                                 /*
9356                                  * We're about to map something over a mapping
9357                                  * established by malloc()...
9358                                  */
9359                                 new_object = VME_OBJECT(copy_entry);
9360                                 if (new_object != VM_OBJECT_NULL) {
9361                                         vm_object_lock_shared(new_object);
9362                                 }
9363                                 while (new_object != VM_OBJECT_NULL &&
9364 #if !CONFIG_EMBEDDED
9365                                        !new_object->true_share &&
9366                                        new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
9367 #endif /* !CONFIG_EMBEDDED */
9368                                        new_object->internal) {
9369                                         new_shadow = new_object->shadow;
9370                                         if (new_shadow == VM_OBJECT_NULL) {
9371                                                 break;
9372                                         }
9373                                         vm_object_lock_shared(new_shadow);
9374                                         vm_object_unlock(new_object);
9375                                         new_object = new_shadow;
9376                                 }
9377                                 if (new_object != VM_OBJECT_NULL) {
9378                                         if (!new_object->internal) {
9379                                                 /*
9380                                                  * The new mapping is backed
9381                                                  * by an external object.  We
9382                                                  * don't want malloc'ed memory
9383                                                  * to be replaced with such a
9384                                                  * non-anonymous mapping, so
9385                                                  * let's go off the optimized
9386                                                  * path...
9387                                                  */
9388                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
9389                                                 vm_object_unlock(new_object);
9390                                                 goto slow_copy;
9391                                         }
9392 #if !CONFIG_EMBEDDED
9393                                         if (new_object->true_share ||
9394                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
9395                                                 /*
9396                                                  * Same if there's a "true_share"
9397                                                  * object in the shadow chain, or
9398                                                  * an object with a non-default
9399                                                  * (SYMMETRIC) copy strategy.
9400                                                  */
9401                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
9402                                                 vm_object_unlock(new_object);
9403                                                 goto slow_copy;
9404                                         }
9405 #endif /* !CONFIG_EMBEDDED */
9406                                         vm_object_unlock(new_object);
9407                                 }
9408                                 /*
9409                                  * The new mapping is still backed by
9410                                  * anonymous (internal) memory, so it's
9411                                  * OK to substitute it for the original
9412                                  * malloc() mapping.
9413                                  */
9414                         }
9415
9416                         if (old_object != VM_OBJECT_NULL) {
9417                                 if(entry->is_sub_map) {
9418                                         if(entry->use_pmap) {
9419 #ifndef NO_NESTED_PMAP
9420                                                 pmap_unnest(dst_map->pmap,
9421                                                             (addr64_t)entry->vme_start,
9422                                                             entry->vme_end - entry->vme_start);
9423 #endif  /* NO_NESTED_PMAP */
9424                                                 if(dst_map->mapped_in_other_pmaps) {
9425                                                         /* clean up parent */
9426                                                         /* map/maps */
9427                                                         vm_map_submap_pmap_clean(
9428                                                                 dst_map, entry->vme_start,
9429                                                                 entry->vme_end,
9430                                                                 VME_SUBMAP(entry),
9431                                                                 VME_OFFSET(entry));
9432                                                 }
9433                                         } else {
9434                                                 vm_map_submap_pmap_clean(
9435                                                         dst_map, entry->vme_start,
9436                                                         entry->vme_end,
9437                                                         VME_SUBMAP(entry),
9438                                                         VME_OFFSET(entry));
9439                                         }
9440                                         vm_map_deallocate(VME_SUBMAP(entry));
9441                                 } else {
9442                                         if(dst_map->mapped_in_other_pmaps) {
9443                                                 vm_object_pmap_protect_options(
9444                                                         VME_OBJECT(entry),
9445                                                         VME_OFFSET(entry),
9446                                                         entry->vme_end
9447                                                         - entry->vme_start,
9448                                                         PMAP_NULL,
9449                                                         entry->vme_start,
9450                                                         VM_PROT_NONE,
9451                                                         PMAP_OPTIONS_REMOVE);
9452                                         } else {
9453                                                 pmap_remove_options(
9454                                                         dst_map->pmap,
9455                                                         (addr64_t)(entry->vme_start),
9456                                                         (addr64_t)(entry->vme_end),
9457                                                         PMAP_OPTIONS_REMOVE);
9458                                         }
9459                                         vm_object_deallocate(old_object);
9460                                 }
9461                         }
9462
9463                         entry->is_sub_map = FALSE;
9464                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
9465                         object = VME_OBJECT(entry);
9466                         entry->needs_copy = copy_entry->needs_copy;
9467                         entry->wired_count = 0;
9468                         entry->user_wired_count = 0;
9469                         offset = VME_OFFSET(copy_entry);
9470                         VME_OFFSET_SET(entry, offset);
9471
9472                         vm_map_copy_entry_unlink(copy, copy_entry);
9473                         vm_map_copy_entry_dispose(copy, copy_entry);
9474
9475                         /*
9476                          * we could try to push pages into the pmap at this point, BUT
9477                          * this optimization only saved on average 2 us per page if ALL
9478                          * the pages in the source were currently mapped
9479                          * and ALL the pages in the dest were touched, if there were fewer
9480                          * than 2/3 of the pages touched, this optimization actually cost more cycles
9481                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
9482                          */
9483
9484                         /*
9485                          *      Set up for the next iteration.  The map
9486                          *      has not been unlocked, so the next
9487                          *      address should be at the end of this
9488                          *      entry, and the next map entry should be
9489                          *      the one following it.
9490                          */
9491
9492                         start = tmp_entry->vme_end;
9493                         tmp_entry = tmp_entry->vme_next;
9494                 } else {
9495                         vm_map_version_t        version;
9496                         vm_object_t             dst_object;
9497                         vm_object_offset_t      dst_offset;
9498                         kern_return_t           r;
9499
9500                 slow_copy:
9501                         if (entry->needs_copy) {
9502                                 VME_OBJECT_SHADOW(entry,
9503                                                   (entry->vme_end -
9504                                                    entry->vme_start));
9505                                 entry->needs_copy = FALSE;
9506                         }
9507
9508                         dst_object = VME_OBJECT(entry);
9509                         dst_offset = VME_OFFSET(entry);
9510
9511                         /*
9512                          *      Take an object reference, and record
9513                          *      the map version information so that the
9514                          *      map can be safely unlocked.
9515                          */
9516
9517                         if (dst_object == VM_OBJECT_NULL) {
9518                                 /*
9519                                  * We would usually have just taken the
9520                                  * optimized path above if the destination
9521                                  * object has not been allocated yet.  But we
9522                                  * now disable that optimization if the copy
9523                                  * entry's object is not backed by anonymous
9524                                  * memory to avoid replacing malloc'ed
9525                                  * (i.e. re-usable) anonymous memory with a
9526                                  * not-so-anonymous mapping.
9527                                  * So we have to handle this case here and
9528                                  * allocate a new VM object for this map entry.
9529                                  */
9530                                 dst_object = vm_object_allocate(
9531                                         entry->vme_end - entry->vme_start);
9532                                 dst_offset = 0;
9533                                 VME_OBJECT_SET(entry, dst_object);
9534                                 VME_OFFSET_SET(entry, dst_offset);
9535                                 assert(entry->use_pmap);
9536
9537                         }
9538
9539                         vm_object_reference(dst_object);
9540
9541                         /* account for unlock bumping up timestamp */
9542                         version.main_timestamp = dst_map->timestamp + 1;
9543
9544                         vm_map_unlock(dst_map);
9545
9546                         /*
9547                          *      Copy as much as possible in one pass
9548                          */
9549
9550                         copy_size = size;
9551                         r = vm_fault_copy(
9552                                 VME_OBJECT(copy_entry),
9553                                 VME_OFFSET(copy_entry),
9554                                 &copy_size,
9555                                 dst_object,
9556                                 dst_offset,
9557                                 dst_map,
9558                                 &version,
9559                                 THREAD_UNINT );
9560
9561                         /*
9562                          *      Release the object reference
9563                          */
9564
9565                         vm_object_deallocate(dst_object);
9566
9567                         /*
9568                          *      If a hard error occurred, return it now
9569                          */
9570
9571                         if (r != KERN_SUCCESS)
9572                                 return(r);
9573
9574                         if (copy_size != 0) {
9575                                 /*
9576                                  *      Dispose of the copied region
9577                                  */
9578
9579                                 vm_map_copy_clip_end(copy, copy_entry,
9580                                                      copy_entry->vme_start + copy_size);
9581                                 vm_map_copy_entry_unlink(copy, copy_entry);
9582                                 vm_object_deallocate(VME_OBJECT(copy_entry));
9583                                 vm_map_copy_entry_dispose(copy, copy_entry);
9584                         }
9585
9586                         /*
9587                          *      Pick up in the destination map where we left off.
9588                          *
9589                          *      Use the version information to avoid a lookup
9590                          *      in the normal case.
9591                          */
9592
9593                         start += copy_size;
9594                         vm_map_lock(dst_map);
9595                         if (version.main_timestamp == dst_map->timestamp &&
9596                             copy_size != 0) {
9597                                 /* We can safely use saved tmp_entry value */
9598
9599                                 if (tmp_entry->map_aligned &&
9600                                     !VM_MAP_PAGE_ALIGNED(
9601                                             start,
9602                                             VM_MAP_PAGE_MASK(dst_map))) {
9603                                         /* no longer map-aligned */
9604                                         tmp_entry->map_aligned = FALSE;
9605                                 }
9606                                 vm_map_clip_end(dst_map, tmp_entry, start);
9607                                 tmp_entry = tmp_entry->vme_next;
9608                         } else {
9609                                 /* Must do lookup of tmp_entry */
9610
9611                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
9612                                         vm_map_unlock(dst_map);
9613                                         return(KERN_INVALID_ADDRESS);
9614                                 }
9615                                 if (tmp_entry->map_aligned &&
9616                                     !VM_MAP_PAGE_ALIGNED(
9617                                             start,
9618                                             VM_MAP_PAGE_MASK(dst_map))) {
9619                                         /* no longer map-aligned */
9620                                         tmp_entry->map_aligned = FALSE;
9621                                 }
9622                                 vm_map_clip_start(dst_map, tmp_entry, start);
9623                         }
9624                 }
9625         }/* while */
9626
9627         return(KERN_SUCCESS);
9628 }/* vm_map_copy_overwrite_aligned */
9629
9630 /*
9631  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
9632  *
9633  *      Description:
9634  *              Copy in data to a kernel buffer from space in the
9635  *              source map. The original space may be optionally
9636  *              deallocated.
9637  *
9638  *              If successful, returns a new copy object.
9639  */
9640 static kern_return_t
9641 vm_map_copyin_kernel_buffer(
9642         vm_map_t        src_map,
9643         vm_map_offset_t src_addr,
9644         vm_map_size_t   len,
9645         boolean_t       src_destroy,
9646         vm_map_copy_t   *copy_result)
9647 {
9648         kern_return_t kr;
9649         vm_map_copy_t copy;
9650         vm_size_t kalloc_size;
9651
9652         if (len > msg_ool_size_small)
9653                 return KERN_INVALID_ARGUMENT;
9654
9655         kalloc_size = (vm_size_t)(cpy_kdata_hdr_sz + len);
9656
9657         copy = (vm_map_copy_t)kalloc(kalloc_size);
9658         if (copy == VM_MAP_COPY_NULL)
9659                 return KERN_RESOURCE_SHORTAGE;
9660         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
9661         copy->size = len;
9662         copy->offset = 0;
9663
9664         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
9665         if (kr != KERN_SUCCESS) {
9666                 kfree(copy, kalloc_size);
9667                 return kr;
9668         }
9669         if (src_destroy) {
9670                 (void) vm_map_remove(
9671                         src_map,
9672                         vm_map_trunc_page(src_addr,
9673                                           VM_MAP_PAGE_MASK(src_map)),
9674                         vm_map_round_page(src_addr + len,
9675                                           VM_MAP_PAGE_MASK(src_map)),
9676                         (VM_MAP_REMOVE_INTERRUPTIBLE |
9677                          VM_MAP_REMOVE_WAIT_FOR_KWIRE |
9678                          ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)));
9679         }
9680         *copy_result = copy;
9681         return KERN_SUCCESS;
9682 }
9683
9684 /*
9685  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
9686  *
9687  *      Description:
9688  *              Copy out data from a kernel buffer into space in the
9689  *              destination map. The space may be otpionally dynamically
9690  *              allocated.
9691  *
9692  *              If successful, consumes the copy object.
9693  *              Otherwise, the caller is responsible for it.
9694  */
9695 static int vm_map_copyout_kernel_buffer_failures = 0;
9696 static kern_return_t
9697 vm_map_copyout_kernel_buffer(
9698         vm_map_t                map,
9699         vm_map_address_t        *addr,  /* IN/OUT */
9700         vm_map_copy_t           copy,
9701         vm_map_size_t           copy_size,
9702         boolean_t               overwrite,
9703         boolean_t               consume_on_success)
9704 {
9705         kern_return_t kr = KERN_SUCCESS;
9706         thread_t thread = current_thread();
9707
9708         assert(copy->size == copy_size);
9709
9710         /*
9711          * check for corrupted vm_map_copy structure
9712          */
9713         if (copy_size > msg_ool_size_small || copy->offset)
9714                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
9715                       (long long)copy->size, (long long)copy->offset);
9716
9717         if (!overwrite) {
9718
9719                 /*
9720                  * Allocate space in the target map for the data
9721                  */
9722                 *addr = 0;
9723                 kr = vm_map_enter(map,
9724                                   addr,
9725                                   vm_map_round_page(copy_size,
9726                                                     VM_MAP_PAGE_MASK(map)),
9727                                   (vm_map_offset_t) 0,
9728                                   VM_FLAGS_ANYWHERE,
9729                                   VM_MAP_KERNEL_FLAGS_NONE,
9730                                   VM_KERN_MEMORY_NONE,
9731                                   VM_OBJECT_NULL,
9732                                   (vm_object_offset_t) 0,
9733                                   FALSE,
9734                                   VM_PROT_DEFAULT,
9735                                   VM_PROT_ALL,
9736                                   VM_INHERIT_DEFAULT);
9737                 if (kr != KERN_SUCCESS)
9738                         return kr;
9739 #if KASAN
9740                 if (map->pmap == kernel_pmap) {
9741                         kasan_notify_address(*addr, copy->size);
9742                 }
9743 #endif
9744         }
9745
9746         /*
9747          * Copyout the data from the kernel buffer to the target map.
9748          */
9749         if (thread->map == map) {
9750
9751                 /*
9752                  * If the target map is the current map, just do
9753                  * the copy.
9754                  */
9755                 assert((vm_size_t)copy_size == copy_size);
9756                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
9757                         kr = KERN_INVALID_ADDRESS;
9758                 }
9759         }
9760         else {
9761                 vm_map_t oldmap;
9762
9763                 /*
9764                  * If the target map is another map, assume the
9765                  * target's address space identity for the duration
9766                  * of the copy.
9767                  */
9768                 vm_map_reference(map);
9769                 oldmap = vm_map_switch(map);
9770
9771                 assert((vm_size_t)copy_size == copy_size);
9772                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
9773                         vm_map_copyout_kernel_buffer_failures++;
9774                         kr = KERN_INVALID_ADDRESS;
9775                 }
9776
9777                 (void) vm_map_switch(oldmap);
9778                 vm_map_deallocate(map);
9779         }
9780
9781         if (kr != KERN_SUCCESS) {
9782                 /* the copy failed, clean up */
9783                 if (!overwrite) {
9784                         /*
9785                          * Deallocate the space we allocated in the target map.
9786                          */
9787                         (void) vm_map_remove(
9788                                 map,
9789                                 vm_map_trunc_page(*addr,
9790                                                   VM_MAP_PAGE_MASK(map)),
9791                                 vm_map_round_page((*addr +
9792                                                    vm_map_round_page(copy_size,
9793                                                                      VM_MAP_PAGE_MASK(map))),
9794                                                   VM_MAP_PAGE_MASK(map)),
9795                                 VM_MAP_NO_FLAGS);
9796                         *addr = 0;
9797                 }
9798         } else {
9799                 /* copy was successful, dicard the copy structure */
9800                 if (consume_on_success) {
9801                         kfree(copy, copy_size + cpy_kdata_hdr_sz);
9802                 }
9803         }
9804
9805         return kr;
9806 }
9807
9808 /*
9809  *      Macro:          vm_map_copy_insert
9810  *
9811  *      Description:
9812  *              Link a copy chain ("copy") into a map at the
9813  *              specified location (after "where").
9814  *      Side effects:
9815  *              The copy chain is destroyed.
9816  *      Warning:
9817  *              The arguments are evaluated multiple times.
9818  */
9819 #define vm_map_copy_insert(map, where, copy)                            \
9820 MACRO_BEGIN                                                             \
9821         vm_map_store_copy_insert(map, where, copy);       \
9822         zfree(vm_map_copy_zone, copy);          \
9823 MACRO_END
9824
9825 void
9826 vm_map_copy_remap(
9827         vm_map_t        map,
9828         vm_map_entry_t  where,
9829         vm_map_copy_t   copy,
9830         vm_map_offset_t adjustment,
9831         vm_prot_t       cur_prot,
9832         vm_prot_t       max_prot,
9833         vm_inherit_t    inheritance)
9834 {
9835         vm_map_entry_t  copy_entry, new_entry;
9836
9837         for (copy_entry = vm_map_copy_first_entry(copy);
9838              copy_entry != vm_map_copy_to_entry(copy);
9839              copy_entry = copy_entry->vme_next) {
9840                 /* get a new VM map entry for the map */
9841                 new_entry = vm_map_entry_create(map,
9842                                                 !map->hdr.entries_pageable);
9843                 /* copy the "copy entry" to the new entry */
9844                 vm_map_entry_copy(new_entry, copy_entry);
9845                 /* adjust "start" and "end" */
9846                 new_entry->vme_start += adjustment;
9847                 new_entry->vme_end += adjustment;
9848                 /* clear some attributes */
9849                 new_entry->inheritance = inheritance;
9850                 new_entry->protection = cur_prot;
9851                 new_entry->max_protection = max_prot;
9852                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
9853                 /* take an extra reference on the entry's "object" */
9854                 if (new_entry->is_sub_map) {
9855                         assert(!new_entry->use_pmap); /* not nested */
9856                         vm_map_lock(VME_SUBMAP(new_entry));
9857                         vm_map_reference(VME_SUBMAP(new_entry));
9858                         vm_map_unlock(VME_SUBMAP(new_entry));
9859                 } else {
9860                         vm_object_reference(VME_OBJECT(new_entry));
9861                 }
9862                 /* insert the new entry in the map */
9863                 vm_map_store_entry_link(map, where, new_entry);
9864                 /* continue inserting the "copy entries" after the new entry */
9865                 where = new_entry;
9866         }
9867 }
9868
9869
9870 /*
9871  * Returns true if *size matches (or is in the range of) copy->size.
9872  * Upon returning true, the *size field is updated with the actual size of the
9873  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
9874  */
9875 boolean_t
9876 vm_map_copy_validate_size(
9877         vm_map_t                dst_map,
9878         vm_map_copy_t           copy,
9879         vm_map_size_t           *size)
9880 {
9881         if (copy == VM_MAP_COPY_NULL)
9882                 return FALSE;
9883         vm_map_size_t copy_sz = copy->size;
9884         vm_map_size_t sz = *size;
9885         switch (copy->type) {
9886         case VM_MAP_COPY_OBJECT:
9887         case VM_MAP_COPY_KERNEL_BUFFER:
9888                 if (sz == copy_sz)
9889                         return TRUE;
9890                 break;
9891         case VM_MAP_COPY_ENTRY_LIST:
9892                 /*
9893                  * potential page-size rounding prevents us from exactly
9894                  * validating this flavor of vm_map_copy, but we can at least
9895                  * assert that it's within a range.
9896                  */
9897                 if (copy_sz >= sz &&
9898                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
9899                         *size = copy_sz;
9900                         return TRUE;
9901                 }
9902                 break;
9903         default:
9904                 break;
9905         }
9906         return FALSE;
9907 }
9908
9909 /*
9910  *      Routine:        vm_map_copyout_size
9911  *
9912  *      Description:
9913  *              Copy out a copy chain ("copy") into newly-allocated
9914  *              space in the destination map. Uses a prevalidated
9915  *              size for the copy object (vm_map_copy_validate_size).
9916  *
9917  *              If successful, consumes the copy object.
9918  *              Otherwise, the caller is responsible for it.
9919  */
9920 kern_return_t
9921 vm_map_copyout_size(
9922         vm_map_t                dst_map,
9923         vm_map_address_t        *dst_addr,      /* OUT */
9924         vm_map_copy_t           copy,
9925         vm_map_size_t           copy_size)
9926 {
9927         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
9928                                        TRUE, /* consume_on_success */
9929                                        VM_PROT_DEFAULT,
9930                                        VM_PROT_ALL,
9931                                        VM_INHERIT_DEFAULT);
9932 }
9933
9934 /*
9935  *      Routine:        vm_map_copyout
9936  *
9937  *      Description:
9938  *              Copy out a copy chain ("copy") into newly-allocated
9939  *              space in the destination map.
9940  *
9941  *              If successful, consumes the copy object.
9942  *              Otherwise, the caller is responsible for it.
9943  */
9944 kern_return_t
9945 vm_map_copyout(
9946         vm_map_t                dst_map,
9947         vm_map_address_t        *dst_addr,      /* OUT */
9948         vm_map_copy_t           copy)
9949 {
9950         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
9951                                        TRUE, /* consume_on_success */
9952                                        VM_PROT_DEFAULT,
9953                                        VM_PROT_ALL,
9954                                        VM_INHERIT_DEFAULT);
9955 }
9956
9957 kern_return_t
9958 vm_map_copyout_internal(
9959         vm_map_t                dst_map,
9960         vm_map_address_t        *dst_addr,      /* OUT */
9961         vm_map_copy_t           copy,
9962         vm_map_size_t           copy_size,
9963         boolean_t               consume_on_success,
9964         vm_prot_t               cur_protection,
9965         vm_prot_t               max_protection,
9966         vm_inherit_t            inheritance)
9967 {
9968         vm_map_size_t           size;
9969         vm_map_size_t           adjustment;
9970         vm_map_offset_t         start;
9971         vm_object_offset_t      vm_copy_start;
9972         vm_map_entry_t          last;
9973         vm_map_entry_t          entry;
9974         vm_map_entry_t          hole_entry;
9975
9976         /*
9977          *      Check for null copy object.
9978          */
9979
9980         if (copy == VM_MAP_COPY_NULL) {
9981                 *dst_addr = 0;
9982                 return(KERN_SUCCESS);
9983         }
9984
9985         if (copy->size != copy_size) {
9986                 *dst_addr = 0;
9987                 return KERN_FAILURE;
9988         }
9989
9990         /*
9991          *      Check for special copy object, created
9992          *      by vm_map_copyin_object.
9993          */
9994
9995         if (copy->type == VM_MAP_COPY_OBJECT) {
9996                 vm_object_t             object = copy->cpy_object;
9997                 kern_return_t           kr;
9998                 vm_object_offset_t      offset;
9999
10000                 offset = vm_object_trunc_page(copy->offset);
10001                 size = vm_map_round_page((copy_size +
10002                                           (vm_map_size_t)(copy->offset -
10003                                                           offset)),
10004                                          VM_MAP_PAGE_MASK(dst_map));
10005                 *dst_addr = 0;
10006                 kr = vm_map_enter(dst_map, dst_addr, size,
10007                                   (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
10008                                   VM_MAP_KERNEL_FLAGS_NONE,
10009                                   VM_KERN_MEMORY_NONE,
10010                                   object, offset, FALSE,
10011                                   VM_PROT_DEFAULT, VM_PROT_ALL,
10012                                   VM_INHERIT_DEFAULT);
10013                 if (kr != KERN_SUCCESS)
10014                         return(kr);
10015                 /* Account for non-pagealigned copy object */
10016                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
10017                 if (consume_on_success)
10018                         zfree(vm_map_copy_zone, copy);
10019                 return(KERN_SUCCESS);
10020         }
10021
10022         /*
10023          *      Check for special kernel buffer allocated
10024          *      by new_ipc_kmsg_copyin.
10025          */
10026
10027         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
10028                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
10029                                                     copy, copy_size, FALSE,
10030                                                     consume_on_success);
10031         }
10032
10033
10034         /*
10035          *      Find space for the data
10036          */
10037
10038         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
10039                                           VM_MAP_COPY_PAGE_MASK(copy));
10040         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
10041                                  VM_MAP_COPY_PAGE_MASK(copy))
10042                 - vm_copy_start;
10043
10044
10045 StartAgain: ;
10046
10047         vm_map_lock(dst_map);
10048         if( dst_map->disable_vmentry_reuse == TRUE) {
10049                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
10050                 last = entry;
10051         } else {
10052                 if (dst_map->holelistenabled) {
10053                         hole_entry = (vm_map_entry_t)dst_map->holes_list;
10054
10055                         if (hole_entry == NULL) {
10056                                 /*
10057                                  * No more space in the map?
10058                                  */
10059                                 vm_map_unlock(dst_map);
10060                                 return(KERN_NO_SPACE);
10061                         }
10062
10063                         last = hole_entry;
10064                         start = last->vme_start;
10065                 } else {
10066                         assert(first_free_is_valid(dst_map));
10067                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
10068                         vm_map_min(dst_map) : last->vme_end;
10069                 }
10070                 start = vm_map_round_page(start,
10071                                           VM_MAP_PAGE_MASK(dst_map));
10072         }
10073
10074         while (TRUE) {
10075                 vm_map_entry_t  next = last->vme_next;
10076                 vm_map_offset_t end = start + size;
10077
10078                 if ((end > dst_map->max_offset) || (end < start)) {
10079                         if (dst_map->wait_for_space) {
10080                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
10081                                         assert_wait((event_t) dst_map,
10082                                                     THREAD_INTERRUPTIBLE);
10083                                         vm_map_unlock(dst_map);
10084                                         thread_block(THREAD_CONTINUE_NULL);
10085                                         goto StartAgain;
10086                                 }
10087                         }
10088                         vm_map_unlock(dst_map);
10089                         return(KERN_NO_SPACE);
10090                 }
10091
10092                 if (dst_map->holelistenabled) {
10093                         if (last->vme_end >= end)
10094                                 break;
10095                 } else {
10096                         /*
10097                          *      If there are no more entries, we must win.
10098                          *
10099                          *      OR
10100                          *
10101                          *      If there is another entry, it must be
10102                          *      after the end of the potential new region.
10103                          */
10104
10105                         if (next == vm_map_to_entry(dst_map))
10106                                 break;
10107
10108                         if (next->vme_start >= end)
10109                                 break;
10110                 }
10111
10112                 last = next;
10113
10114                 if (dst_map->holelistenabled) {
10115                         if (last == (vm_map_entry_t) dst_map->holes_list) {
10116                                 /*
10117                                  * Wrapped around
10118                                  */
10119                                 vm_map_unlock(dst_map);
10120                                 return(KERN_NO_SPACE);
10121                         }
10122                         start = last->vme_start;
10123                 } else {
10124                         start = last->vme_end;
10125                 }
10126                 start = vm_map_round_page(start,
10127                                           VM_MAP_PAGE_MASK(dst_map));
10128         }
10129
10130         if (dst_map->holelistenabled) {
10131                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
10132                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
10133                 }
10134         }
10135
10136
10137         adjustment = start - vm_copy_start;
10138         if (! consume_on_success) {
10139                 /*
10140                  * We're not allowed to consume "copy", so we'll have to
10141                  * copy its map entries into the destination map below.
10142                  * No need to re-allocate map entries from the correct
10143                  * (pageable or not) zone, since we'll get new map entries
10144                  * during the transfer.
10145                  * We'll also adjust the map entries's "start" and "end"
10146                  * during the transfer, to keep "copy"'s entries consistent
10147                  * with its "offset".
10148                  */
10149                 goto after_adjustments;
10150         }
10151
10152         /*
10153          *      Since we're going to just drop the map
10154          *      entries from the copy into the destination
10155          *      map, they must come from the same pool.
10156          */
10157
10158         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
10159                 /*
10160                  * Mismatches occur when dealing with the default
10161                  * pager.
10162                  */
10163                 zone_t          old_zone;
10164                 vm_map_entry_t  next, new;
10165
10166                 /*
10167                  * Find the zone that the copies were allocated from
10168                  */
10169
10170                 entry = vm_map_copy_first_entry(copy);
10171
10172                 /*
10173                  * Reinitialize the copy so that vm_map_copy_entry_link
10174                  * will work.
10175                  */
10176                 vm_map_store_copy_reset(copy, entry);
10177                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
10178
10179                 /*
10180                  * Copy each entry.
10181                  */
10182                 while (entry != vm_map_copy_to_entry(copy)) {
10183                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10184                         vm_map_entry_copy_full(new, entry);
10185                         assert(!new->iokit_acct);
10186                         if (new->is_sub_map) {
10187                                 /* clr address space specifics */
10188                                 new->use_pmap = FALSE;
10189                         }
10190                         vm_map_copy_entry_link(copy,
10191                                                vm_map_copy_last_entry(copy),
10192                                                new);
10193                         next = entry->vme_next;
10194                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
10195                         zfree(old_zone, entry);
10196                         entry = next;
10197                 }
10198         }
10199
10200         /*
10201          *      Adjust the addresses in the copy chain, and
10202          *      reset the region attributes.
10203          */
10204
10205         for (entry = vm_map_copy_first_entry(copy);
10206              entry != vm_map_copy_to_entry(copy);
10207              entry = entry->vme_next) {
10208                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
10209                         /*
10210                          * We're injecting this copy entry into a map that
10211                          * has the standard page alignment, so clear
10212                          * "map_aligned" (which might have been inherited
10213                          * from the original map entry).
10214                          */
10215                         entry->map_aligned = FALSE;
10216                 }
10217
10218                 entry->vme_start += adjustment;
10219                 entry->vme_end += adjustment;
10220
10221                 if (entry->map_aligned) {
10222                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
10223                                                    VM_MAP_PAGE_MASK(dst_map)));
10224                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
10225                                                    VM_MAP_PAGE_MASK(dst_map)));
10226                 }
10227
10228                 entry->inheritance = VM_INHERIT_DEFAULT;
10229                 entry->protection = VM_PROT_DEFAULT;
10230                 entry->max_protection = VM_PROT_ALL;
10231                 entry->behavior = VM_BEHAVIOR_DEFAULT;
10232
10233                 /*
10234                  * If the entry is now wired,
10235                  * map the pages into the destination map.
10236                  */
10237                 if (entry->wired_count != 0) {
10238                         vm_map_offset_t va;
10239                         vm_object_offset_t       offset;
10240                         vm_object_t object;
10241                         vm_prot_t prot;
10242                         int     type_of_fault;
10243
10244                         object = VME_OBJECT(entry);
10245                         offset = VME_OFFSET(entry);
10246                         va = entry->vme_start;
10247
10248                         pmap_pageable(dst_map->pmap,
10249                                       entry->vme_start,
10250                                       entry->vme_end,
10251                                       TRUE);
10252
10253                         while (va < entry->vme_end) {
10254                                 vm_page_t       m;
10255
10256                                 /*
10257                                  * Look up the page in the object.
10258                                  * Assert that the page will be found in the
10259                                  * top object:
10260                                  * either
10261                                  *      the object was newly created by
10262                                  *      vm_object_copy_slowly, and has
10263                                  *      copies of all of the pages from
10264                                  *      the source object
10265                                  * or
10266                                  *      the object was moved from the old
10267                                  *      map entry; because the old map
10268                                  *      entry was wired, all of the pages
10269                                  *      were in the top-level object.
10270                                  *      (XXX not true if we wire pages for
10271                                  *       reading)
10272                                  */
10273                                 vm_object_lock(object);
10274
10275                                 m = vm_page_lookup(object, offset);
10276                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
10277                                     m->absent)
10278                                         panic("vm_map_copyout: wiring %p", m);
10279
10280                                 prot = entry->protection;
10281
10282                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
10283                                     prot)
10284                                         prot |= VM_PROT_EXECUTE;
10285
10286                                 type_of_fault = DBG_CACHE_HIT_FAULT;
10287
10288                                 vm_fault_enter(m, dst_map->pmap, va, prot, prot,
10289                                                                 VM_PAGE_WIRED(m),
10290                                                                 FALSE, /* change_wiring */
10291                                                                 VM_KERN_MEMORY_NONE, /* tag - not wiring */
10292                                                                 FALSE, /* no_cache */
10293                                                                 FALSE, /* cs_bypass */
10294                                                                 VME_ALIAS(entry),
10295                                                                 ((entry->iokit_acct ||
10296                                                                  (!entry->is_sub_map &&
10297                                                                   !entry->use_pmap))
10298                                                                 ? PMAP_OPTIONS_ALT_ACCT
10299                                                                 : 0),  /* pmap_options */
10300                                                                 NULL,  /* need_retry */
10301                                                                 &type_of_fault);
10302
10303                                 vm_object_unlock(object);
10304
10305                                 offset += PAGE_SIZE_64;
10306                                 va += PAGE_SIZE;
10307                         }
10308                 }
10309         }
10310
10311 after_adjustments:
10312
10313         /*
10314          *      Correct the page alignment for the result
10315          */
10316
10317         *dst_addr = start + (copy->offset - vm_copy_start);
10318
10319 #if KASAN
10320         kasan_notify_address(*dst_addr, size);
10321 #endif
10322
10323         /*
10324          *      Update the hints and the map size
10325          */
10326
10327         if (consume_on_success) {
10328                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
10329         } else {
10330                 SAVE_HINT_MAP_WRITE(dst_map, last);
10331         }
10332
10333         dst_map->size += size;
10334
10335         /*
10336          *      Link in the copy
10337          */
10338
10339         if (consume_on_success) {
10340                 vm_map_copy_insert(dst_map, last, copy);
10341         } else {
10342                 vm_map_copy_remap(dst_map, last, copy, adjustment,
10343                                   cur_protection, max_protection,
10344                                   inheritance);
10345         }
10346
10347         vm_map_unlock(dst_map);
10348
10349         /*
10350          * XXX  If wiring_required, call vm_map_pageable
10351          */
10352
10353         return(KERN_SUCCESS);
10354 }
10355
10356 /*
10357  *      Routine:        vm_map_copyin
10358  *
10359  *      Description:
10360  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
10361  *
10362  */
10363
10364 #undef vm_map_copyin
10365
10366 kern_return_t
10367 vm_map_copyin(
10368         vm_map_t                        src_map,
10369         vm_map_address_t        src_addr,
10370         vm_map_size_t           len,
10371         boolean_t                       src_destroy,
10372         vm_map_copy_t           *copy_result)   /* OUT */
10373 {
10374         return(vm_map_copyin_common(src_map, src_addr, len, src_destroy,
10375                                         FALSE, copy_result, FALSE));
10376 }
10377
10378 /*
10379  *      Routine:        vm_map_copyin_common
10380  *
10381  *      Description:
10382  *              Copy the specified region (src_addr, len) from the
10383  *              source address space (src_map), possibly removing
10384  *              the region from the source address space (src_destroy).
10385  *
10386  *      Returns:
10387  *              A vm_map_copy_t object (copy_result), suitable for
10388  *              insertion into another address space (using vm_map_copyout),
10389  *              copying over another address space region (using
10390  *              vm_map_copy_overwrite).  If the copy is unused, it
10391  *              should be destroyed (using vm_map_copy_discard).
10392  *
10393  *      In/out conditions:
10394  *              The source map should not be locked on entry.
10395  */
10396
10397 typedef struct submap_map {
10398         vm_map_t        parent_map;
10399         vm_map_offset_t base_start;
10400         vm_map_offset_t base_end;
10401         vm_map_size_t   base_len;
10402         struct submap_map *next;
10403 } submap_map_t;
10404
10405 kern_return_t
10406 vm_map_copyin_common(
10407         vm_map_t        src_map,
10408         vm_map_address_t src_addr,
10409         vm_map_size_t   len,
10410         boolean_t       src_destroy,
10411         __unused boolean_t      src_volatile,
10412         vm_map_copy_t   *copy_result,   /* OUT */
10413         boolean_t       use_maxprot)
10414 {
10415         int flags;
10416
10417         flags = 0;
10418         if (src_destroy) {
10419                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
10420         }
10421         if (use_maxprot) {
10422                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
10423         }
10424         return vm_map_copyin_internal(src_map,
10425                                       src_addr,
10426                                       len,
10427                                       flags,
10428                                       copy_result);
10429 }
10430 kern_return_t
10431 vm_map_copyin_internal(
10432         vm_map_t        src_map,
10433         vm_map_address_t src_addr,
10434         vm_map_size_t   len,
10435         int             flags,
10436         vm_map_copy_t   *copy_result)   /* OUT */
10437 {
10438         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
10439                                          * in multi-level lookup, this
10440                                          * entry contains the actual
10441                                          * vm_object/offset.
10442                                          */
10443         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
10444
10445         vm_map_offset_t src_start;      /* Start of current entry --
10446                                          * where copy is taking place now
10447                                          */
10448         vm_map_offset_t src_end;        /* End of entire region to be
10449                                          * copied */
10450         vm_map_offset_t src_base;
10451         vm_map_t        base_map = src_map;
10452         boolean_t       map_share=FALSE;
10453         submap_map_t    *parent_maps = NULL;
10454
10455         vm_map_copy_t   copy;           /* Resulting copy */
10456         vm_map_address_t copy_addr;
10457         vm_map_size_t   copy_size;
10458         boolean_t       src_destroy;
10459         boolean_t       use_maxprot;
10460         boolean_t       preserve_purgeable;
10461         boolean_t       entry_was_shared;
10462         vm_map_entry_t  saved_src_entry;
10463
10464         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
10465                 return KERN_INVALID_ARGUMENT;
10466         }
10467
10468         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
10469         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
10470         preserve_purgeable =
10471                 (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
10472
10473         /*
10474          *      Check for copies of zero bytes.
10475          */
10476
10477         if (len == 0) {
10478                 *copy_result = VM_MAP_COPY_NULL;
10479                 return(KERN_SUCCESS);
10480         }
10481
10482         /*
10483          *      Check that the end address doesn't overflow
10484          */
10485         src_end = src_addr + len;
10486         if (src_end < src_addr)
10487                 return KERN_INVALID_ADDRESS;
10488
10489         /*
10490          *      Compute (page aligned) start and end of region
10491          */
10492         src_start = vm_map_trunc_page(src_addr,
10493                                       VM_MAP_PAGE_MASK(src_map));
10494         src_end = vm_map_round_page(src_end,
10495                                     VM_MAP_PAGE_MASK(src_map));
10496
10497         /*
10498          * If the copy is sufficiently small, use a kernel buffer instead
10499          * of making a virtual copy.  The theory being that the cost of
10500          * setting up VM (and taking C-O-W faults) dominates the copy costs
10501          * for small regions.
10502          */
10503         if ((len < msg_ool_size_small) &&
10504             !use_maxprot &&
10505             !preserve_purgeable &&
10506             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
10507             /*
10508              * Since the "msg_ool_size_small" threshold was increased and
10509              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
10510              * address space limits, we revert to doing a virtual copy if the
10511              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
10512              * of the commpage would now fail when it used to work.
10513              */
10514             (src_start >= vm_map_min(src_map) &&
10515              src_start < vm_map_max(src_map) &&
10516              src_end >= vm_map_min(src_map) &&
10517              src_end < vm_map_max(src_map)))
10518                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
10519                                                    src_destroy, copy_result);
10520
10521         XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0);
10522
10523         /*
10524          *      Allocate a header element for the list.
10525          *
10526          *      Use the start and end in the header to
10527          *      remember the endpoints prior to rounding.
10528          */
10529
10530         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
10531         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
10532         vm_map_copy_first_entry(copy) =
10533                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
10534         copy->type = VM_MAP_COPY_ENTRY_LIST;
10535         copy->cpy_hdr.nentries = 0;
10536         copy->cpy_hdr.entries_pageable = TRUE;
10537 #if 00
10538         copy->cpy_hdr.page_shift = src_map->hdr.page_shift;
10539 #else
10540         /*
10541          * The copy entries can be broken down for a variety of reasons,
10542          * so we can't guarantee that they will remain map-aligned...
10543          * Will need to adjust the first copy_entry's "vme_start" and
10544          * the last copy_entry's "vme_end" to be rounded to PAGE_MASK
10545          * rather than the original map's alignment.
10546          */
10547         copy->cpy_hdr.page_shift = PAGE_SHIFT;
10548 #endif
10549
10550         vm_map_store_init( &(copy->cpy_hdr) );
10551
10552         copy->offset = src_addr;
10553         copy->size = len;
10554
10555         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10556
10557 #define RETURN(x)                                               \
10558         MACRO_BEGIN                                             \
10559         vm_map_unlock(src_map);                                 \
10560         if(src_map != base_map)                                 \
10561                 vm_map_deallocate(src_map);                     \
10562         if (new_entry != VM_MAP_ENTRY_NULL)                     \
10563                 vm_map_copy_entry_dispose(copy,new_entry);      \
10564         vm_map_copy_discard(copy);                              \
10565         {                                                       \
10566                 submap_map_t    *_ptr;                          \
10567                                                                 \
10568                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
10569                         parent_maps=parent_maps->next;          \
10570                         if (_ptr->parent_map != base_map)       \
10571                                 vm_map_deallocate(_ptr->parent_map);    \
10572                         kfree(_ptr, sizeof(submap_map_t));      \
10573                 }                                               \
10574         }                                                       \
10575         MACRO_RETURN(x);                                        \
10576         MACRO_END
10577
10578         /*
10579          *      Find the beginning of the region.
10580          */
10581
10582         vm_map_lock(src_map);
10583
10584         /*
10585          * Lookup the original "src_addr" rather than the truncated
10586          * "src_start", in case "src_start" falls in a non-map-aligned
10587          * map entry *before* the map entry that contains "src_addr"...
10588          */
10589         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry))
10590                 RETURN(KERN_INVALID_ADDRESS);
10591         if(!tmp_entry->is_sub_map) {
10592                 /*
10593                  * ... but clip to the map-rounded "src_start" rather than
10594                  * "src_addr" to preserve map-alignment.  We'll adjust the
10595                  * first copy entry at the end, if needed.
10596                  */
10597                 vm_map_clip_start(src_map, tmp_entry, src_start);
10598         }
10599         if (src_start < tmp_entry->vme_start) {
10600                 /*
10601                  * Move "src_start" up to the start of the
10602                  * first map entry to copy.
10603                  */
10604                 src_start = tmp_entry->vme_start;
10605         }
10606         /* set for later submap fix-up */
10607         copy_addr = src_start;
10608
10609         /*
10610          *      Go through entries until we get to the end.
10611          */
10612
10613         while (TRUE) {
10614                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
10615                 vm_map_size_t   src_size;               /* Size of source
10616                                                          * map entry (in both
10617                                                          * maps)
10618                                                          */
10619
10620                 vm_object_t             src_object;     /* Object to copy */
10621                 vm_object_offset_t      src_offset;
10622
10623                 boolean_t       src_needs_copy;         /* Should source map
10624                                                          * be made read-only
10625                                                          * for copy-on-write?
10626                                                          */
10627
10628                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
10629
10630                 boolean_t       was_wired;              /* Was source wired? */
10631                 vm_map_version_t version;               /* Version before locks
10632                                                          * dropped to make copy
10633                                                          */
10634                 kern_return_t   result;                 /* Return value from
10635                                                          * copy_strategically.
10636                                                          */
10637                 while(tmp_entry->is_sub_map) {
10638                         vm_map_size_t submap_len;
10639                         submap_map_t *ptr;
10640
10641                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
10642                         ptr->next = parent_maps;
10643                         parent_maps = ptr;
10644                         ptr->parent_map = src_map;
10645                         ptr->base_start = src_start;
10646                         ptr->base_end = src_end;
10647                         submap_len = tmp_entry->vme_end - src_start;
10648                         if(submap_len > (src_end-src_start))
10649                                 submap_len = src_end-src_start;
10650                         ptr->base_len = submap_len;
10651
10652                         src_start -= tmp_entry->vme_start;
10653                         src_start += VME_OFFSET(tmp_entry);
10654                         src_end = src_start + submap_len;
10655                         src_map = VME_SUBMAP(tmp_entry);
10656                         vm_map_lock(src_map);
10657                         /* keep an outstanding reference for all maps in */
10658                         /* the parents tree except the base map */
10659                         vm_map_reference(src_map);
10660                         vm_map_unlock(ptr->parent_map);
10661                         if (!vm_map_lookup_entry(
10662                                     src_map, src_start, &tmp_entry))
10663                                 RETURN(KERN_INVALID_ADDRESS);
10664                         map_share = TRUE;
10665                         if(!tmp_entry->is_sub_map)
10666                                 vm_map_clip_start(src_map, tmp_entry, src_start);
10667                         src_entry = tmp_entry;
10668                 }
10669                 /* we are now in the lowest level submap... */
10670
10671                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
10672                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
10673                         /* This is not, supported for now.In future */
10674                         /* we will need to detect the phys_contig   */
10675                         /* condition and then upgrade copy_slowly   */
10676                         /* to do physical copy from the device mem  */
10677                         /* based object. We can piggy-back off of   */
10678                         /* the was wired boolean to set-up the      */
10679                         /* proper handling */
10680                         RETURN(KERN_PROTECTION_FAILURE);
10681                 }
10682                 /*
10683                  *      Create a new address map entry to hold the result.
10684                  *      Fill in the fields from the appropriate source entries.
10685                  *      We must unlock the source map to do this if we need
10686                  *      to allocate a map entry.
10687                  */
10688                 if (new_entry == VM_MAP_ENTRY_NULL) {
10689                         version.main_timestamp = src_map->timestamp;
10690                         vm_map_unlock(src_map);
10691
10692                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
10693
10694                         vm_map_lock(src_map);
10695                         if ((version.main_timestamp + 1) != src_map->timestamp) {
10696                                 if (!vm_map_lookup_entry(src_map, src_start,
10697                                                          &tmp_entry)) {
10698                                         RETURN(KERN_INVALID_ADDRESS);
10699                                 }
10700                                 if (!tmp_entry->is_sub_map)
10701                                         vm_map_clip_start(src_map, tmp_entry, src_start);
10702                                 continue; /* restart w/ new tmp_entry */
10703                         }
10704                 }
10705
10706                 /*
10707                  *      Verify that the region can be read.
10708                  */
10709                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
10710                      !use_maxprot) ||
10711                     (src_entry->max_protection & VM_PROT_READ) == 0)
10712                         RETURN(KERN_PROTECTION_FAILURE);
10713
10714                 /*
10715                  *      Clip against the endpoints of the entire region.
10716                  */
10717
10718                 vm_map_clip_end(src_map, src_entry, src_end);
10719
10720                 src_size = src_entry->vme_end - src_start;
10721                 src_object = VME_OBJECT(src_entry);
10722                 src_offset = VME_OFFSET(src_entry);
10723                 was_wired = (src_entry->wired_count != 0);
10724
10725                 vm_map_entry_copy(new_entry, src_entry);
10726                 if (new_entry->is_sub_map) {
10727                         /* clr address space specifics */
10728                         new_entry->use_pmap = FALSE;
10729                 }
10730
10731                 /*
10732                  *      Attempt non-blocking copy-on-write optimizations.
10733                  */
10734
10735                 if (src_destroy &&
10736                     (src_object == VM_OBJECT_NULL ||
10737                      (src_object->internal &&
10738                       src_object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
10739                       !src_object->true_share &&
10740                       !map_share))) {
10741                         /*
10742                          * If we are destroying the source, and the object
10743                          * is internal, we can move the object reference
10744                          * from the source to the copy.  The copy is
10745                          * copy-on-write only if the source is.
10746                          * We make another reference to the object, because
10747                          * destroying the source entry will deallocate it.
10748                          */
10749                         vm_object_reference(src_object);
10750
10751                         /*
10752                          * Copy is always unwired.  vm_map_copy_entry
10753                          * set its wired count to zero.
10754                          */
10755
10756                         goto CopySuccessful;
10757                 }
10758
10759
10760         RestartCopy:
10761                 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n",
10762                     src_object, new_entry, VME_OBJECT(new_entry),
10763                     was_wired, 0);
10764                 if ((src_object == VM_OBJECT_NULL ||
10765                      (!was_wired && !map_share && !tmp_entry->is_shared)) &&
10766                     vm_object_copy_quickly(
10767                             &VME_OBJECT(new_entry),
10768                             src_offset,
10769                             src_size,
10770                             &src_needs_copy,
10771                             &new_entry_needs_copy)) {
10772
10773                         new_entry->needs_copy = new_entry_needs_copy;
10774
10775                         /*
10776                          *      Handle copy-on-write obligations
10777                          */
10778
10779                         if (src_needs_copy && !tmp_entry->needs_copy) {
10780                                 vm_prot_t prot;
10781
10782                                 prot = src_entry->protection & ~VM_PROT_WRITE;
10783
10784                                 if (override_nx(src_map, VME_ALIAS(src_entry))
10785                                     && prot)
10786                                         prot |= VM_PROT_EXECUTE;
10787
10788                                 vm_object_pmap_protect(
10789                                         src_object,
10790                                         src_offset,
10791                                         src_size,
10792                                         (src_entry->is_shared ?
10793                                          PMAP_NULL
10794                                          : src_map->pmap),
10795                                         src_entry->vme_start,
10796                                         prot);
10797
10798                                 assert(tmp_entry->wired_count == 0);
10799                                 tmp_entry->needs_copy = TRUE;
10800                         }
10801
10802                         /*
10803                          *      The map has never been unlocked, so it's safe
10804                          *      to move to the next entry rather than doing
10805                          *      another lookup.
10806                          */
10807
10808                         goto CopySuccessful;
10809                 }
10810
10811                 entry_was_shared = tmp_entry->is_shared;
10812
10813                 /*
10814                  *      Take an object reference, so that we may
10815                  *      release the map lock(s).
10816                  */
10817
10818                 assert(src_object != VM_OBJECT_NULL);
10819                 vm_object_reference(src_object);
10820
10821                 /*
10822                  *      Record the timestamp for later verification.
10823                  *      Unlock the map.
10824                  */
10825
10826                 version.main_timestamp = src_map->timestamp;
10827                 vm_map_unlock(src_map); /* Increments timestamp once! */
10828                 saved_src_entry = src_entry;
10829                 tmp_entry = VM_MAP_ENTRY_NULL;
10830                 src_entry = VM_MAP_ENTRY_NULL;
10831
10832                 /*
10833                  *      Perform the copy
10834                  */
10835
10836                 if (was_wired) {
10837                 CopySlowly:
10838                         vm_object_lock(src_object);
10839                         result = vm_object_copy_slowly(
10840                                 src_object,
10841                                 src_offset,
10842                                 src_size,
10843                                 THREAD_UNINT,
10844                                 &VME_OBJECT(new_entry));
10845                         VME_OFFSET_SET(new_entry, 0);
10846                         new_entry->needs_copy = FALSE;
10847
10848                 }
10849                 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10850                          (entry_was_shared  || map_share)) {
10851                         vm_object_t new_object;
10852
10853                         vm_object_lock_shared(src_object);
10854                         new_object = vm_object_copy_delayed(
10855                                 src_object,
10856                                 src_offset,
10857                                 src_size,
10858                                 TRUE);
10859                         if (new_object == VM_OBJECT_NULL)
10860                                 goto CopySlowly;
10861
10862                         VME_OBJECT_SET(new_entry, new_object);
10863                         assert(new_entry->wired_count == 0);
10864                         new_entry->needs_copy = TRUE;
10865                         assert(!new_entry->iokit_acct);
10866                         assert(new_object->purgable == VM_PURGABLE_DENY);
10867                         new_entry->use_pmap = TRUE;
10868                         result = KERN_SUCCESS;
10869
10870                 } else {
10871                         vm_object_offset_t new_offset;
10872                         new_offset = VME_OFFSET(new_entry);
10873                         result = vm_object_copy_strategically(src_object,
10874                                                               src_offset,
10875                                                               src_size,
10876                                                               &VME_OBJECT(new_entry),
10877                                                               &new_offset,
10878                                                               &new_entry_needs_copy);
10879                         if (new_offset != VME_OFFSET(new_entry)) {
10880                                 VME_OFFSET_SET(new_entry, new_offset);
10881                         }
10882
10883                         new_entry->needs_copy = new_entry_needs_copy;
10884                 }
10885
10886                 if (result == KERN_SUCCESS &&
10887                     preserve_purgeable &&
10888                     src_object->purgable != VM_PURGABLE_DENY) {
10889                         vm_object_t     new_object;
10890
10891                         new_object = VME_OBJECT(new_entry);
10892                         assert(new_object != src_object);
10893                         vm_object_lock(new_object);
10894                         assert(new_object->ref_count == 1);
10895                         assert(new_object->shadow == VM_OBJECT_NULL);
10896                         assert(new_object->copy == VM_OBJECT_NULL);
10897                         assert(new_object->vo_purgeable_owner == NULL);
10898
10899                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
10900                         new_object->true_share = TRUE;
10901                         /* start as non-volatile with no owner... */
10902                         new_object->purgable = VM_PURGABLE_NONVOLATILE;
10903                         vm_purgeable_nonvolatile_enqueue(new_object, NULL);
10904                         /* ... and move to src_object's purgeable state */
10905                         if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
10906                                 int state;
10907                                 state = src_object->purgable;
10908                                 vm_object_purgable_control(
10909                                         new_object,
10910                                         VM_PURGABLE_SET_STATE_FROM_KERNEL,
10911                                         &state);
10912                         }
10913                         vm_object_unlock(new_object);
10914                         new_object = VM_OBJECT_NULL;
10915                 }
10916
10917                 if (result != KERN_SUCCESS &&
10918                     result != KERN_MEMORY_RESTART_COPY) {
10919                         vm_map_lock(src_map);
10920                         RETURN(result);
10921                 }
10922
10923                 /*
10924                  *      Throw away the extra reference
10925                  */
10926
10927                 vm_object_deallocate(src_object);
10928
10929                 /*
10930                  *      Verify that the map has not substantially
10931                  *      changed while the copy was being made.
10932                  */
10933
10934                 vm_map_lock(src_map);
10935
10936                 if ((version.main_timestamp + 1) == src_map->timestamp) {
10937                         /* src_map hasn't changed: src_entry is still valid */
10938                         src_entry = saved_src_entry;
10939                         goto VerificationSuccessful;
10940                 }
10941
10942                 /*
10943                  *      Simple version comparison failed.
10944                  *
10945                  *      Retry the lookup and verify that the
10946                  *      same object/offset are still present.
10947                  *
10948                  *      [Note: a memory manager that colludes with
10949                  *      the calling task can detect that we have
10950                  *      cheated.  While the map was unlocked, the
10951                  *      mapping could have been changed and restored.]
10952                  */
10953
10954                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
10955                         if (result != KERN_MEMORY_RESTART_COPY) {
10956                                 vm_object_deallocate(VME_OBJECT(new_entry));
10957                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
10958                                 assert(!new_entry->iokit_acct);
10959                                 new_entry->use_pmap = TRUE;
10960                         }
10961                         RETURN(KERN_INVALID_ADDRESS);
10962                 }
10963
10964                 src_entry = tmp_entry;
10965                 vm_map_clip_start(src_map, src_entry, src_start);
10966
10967                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
10968                      !use_maxprot) ||
10969                     ((src_entry->max_protection & VM_PROT_READ) == 0))
10970                         goto VerificationFailed;
10971
10972                 if (src_entry->vme_end < new_entry->vme_end) {
10973                         /*
10974                          * This entry might have been shortened
10975                          * (vm_map_clip_end) or been replaced with
10976                          * an entry that ends closer to "src_start"
10977                          * than before.
10978                          * Adjust "new_entry" accordingly; copying
10979                          * less memory would be correct but we also
10980                          * redo the copy (see below) if the new entry
10981                          * no longer points at the same object/offset.
10982                          */
10983                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
10984                                                    VM_MAP_COPY_PAGE_MASK(copy)));
10985                         new_entry->vme_end = src_entry->vme_end;
10986                         src_size = new_entry->vme_end - src_start;
10987                 } else if (src_entry->vme_end > new_entry->vme_end) {
10988                         /*
10989                          * This entry might have been extended
10990                          * (vm_map_entry_simplify() or coalesce)
10991                          * or been replaced with an entry that ends farther
10992                          * from "src_start" than before.
10993                          *
10994                          * We've called vm_object_copy_*() only on
10995                          * the previous <start:end> range, so we can't
10996                          * just extend new_entry.  We have to re-do
10997                          * the copy based on the new entry as if it was
10998                          * pointing at a different object/offset (see
10999                          * "Verification failed" below).
11000                          */
11001                 }
11002
11003                 if ((VME_OBJECT(src_entry) != src_object) ||
11004                     (VME_OFFSET(src_entry) != src_offset) ||
11005                     (src_entry->vme_end > new_entry->vme_end)) {
11006
11007                         /*
11008                          *      Verification failed.
11009                          *
11010                          *      Start over with this top-level entry.
11011                          */
11012
11013                 VerificationFailed: ;
11014
11015                         vm_object_deallocate(VME_OBJECT(new_entry));
11016                         tmp_entry = src_entry;
11017                         continue;
11018                 }
11019
11020                 /*
11021                  *      Verification succeeded.
11022                  */
11023
11024         VerificationSuccessful: ;
11025
11026                 if (result == KERN_MEMORY_RESTART_COPY)
11027                         goto RestartCopy;
11028
11029                 /*
11030                  *      Copy succeeded.
11031                  */
11032
11033         CopySuccessful: ;
11034
11035                 /*
11036                  *      Link in the new copy entry.
11037                  */
11038
11039                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
11040                                        new_entry);
11041
11042                 /*
11043                  *      Determine whether the entire region
11044                  *      has been copied.
11045                  */
11046                 src_base = src_start;
11047                 src_start = new_entry->vme_end;
11048                 new_entry = VM_MAP_ENTRY_NULL;
11049                 while ((src_start >= src_end) && (src_end != 0)) {
11050                         submap_map_t    *ptr;
11051
11052                         if (src_map == base_map) {
11053                                 /* back to the top */
11054                                 break;
11055                         }
11056
11057                         ptr = parent_maps;
11058                         assert(ptr != NULL);
11059                         parent_maps = parent_maps->next;
11060
11061                         /* fix up the damage we did in that submap */
11062                         vm_map_simplify_range(src_map,
11063                                               src_base,
11064                                               src_end);
11065
11066                         vm_map_unlock(src_map);
11067                         vm_map_deallocate(src_map);
11068                         vm_map_lock(ptr->parent_map);
11069                         src_map = ptr->parent_map;
11070                         src_base = ptr->base_start;
11071                         src_start = ptr->base_start + ptr->base_len;
11072                         src_end = ptr->base_end;
11073                         if (!vm_map_lookup_entry(src_map,
11074                                                  src_start,
11075                                                  &tmp_entry) &&
11076                             (src_end > src_start)) {
11077                                 RETURN(KERN_INVALID_ADDRESS);
11078                         }
11079                         kfree(ptr, sizeof(submap_map_t));
11080                         if (parent_maps == NULL)
11081                                 map_share = FALSE;
11082                         src_entry = tmp_entry->vme_prev;
11083                 }
11084
11085                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
11086                     (src_start >= src_addr + len) &&
11087                     (src_addr + len != 0)) {
11088                         /*
11089                          * Stop copying now, even though we haven't reached
11090                          * "src_end".  We'll adjust the end of the last copy
11091                          * entry at the end, if needed.
11092                          *
11093                          * If src_map's aligment is different from the
11094                          * system's page-alignment, there could be
11095                          * extra non-map-aligned map entries between
11096                          * the original (non-rounded) "src_addr + len"
11097                          * and the rounded "src_end".
11098                          * We do not want to copy those map entries since
11099                          * they're not part of the copied range.
11100                          */
11101                         break;
11102                 }
11103
11104                 if ((src_start >= src_end) && (src_end != 0))
11105                         break;
11106
11107                 /*
11108                  *      Verify that there are no gaps in the region
11109                  */
11110
11111                 tmp_entry = src_entry->vme_next;
11112                 if ((tmp_entry->vme_start != src_start) ||
11113                     (tmp_entry == vm_map_to_entry(src_map))) {
11114                         RETURN(KERN_INVALID_ADDRESS);
11115                 }
11116         }
11117
11118         /*
11119          * If the source should be destroyed, do it now, since the
11120          * copy was successful.
11121          */
11122         if (src_destroy) {
11123                 (void) vm_map_delete(
11124                         src_map,
11125                         vm_map_trunc_page(src_addr,
11126                                           VM_MAP_PAGE_MASK(src_map)),
11127                         src_end,
11128                         ((src_map == kernel_map) ?
11129                          VM_MAP_REMOVE_KUNWIRE :
11130                          VM_MAP_NO_FLAGS),
11131                         VM_MAP_NULL);
11132         } else {
11133                 /* fix up the damage we did in the base map */
11134                 vm_map_simplify_range(
11135                         src_map,
11136                         vm_map_trunc_page(src_addr,
11137                                           VM_MAP_PAGE_MASK(src_map)),
11138                         vm_map_round_page(src_end,
11139                                           VM_MAP_PAGE_MASK(src_map)));
11140         }
11141
11142         vm_map_unlock(src_map);
11143         tmp_entry = VM_MAP_ENTRY_NULL;
11144
11145         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) {
11146                 vm_map_offset_t original_start, original_offset, original_end;
11147
11148                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
11149
11150                 /* adjust alignment of first copy_entry's "vme_start" */
11151                 tmp_entry = vm_map_copy_first_entry(copy);
11152                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11153                         vm_map_offset_t adjustment;
11154
11155                         original_start = tmp_entry->vme_start;
11156                         original_offset = VME_OFFSET(tmp_entry);
11157
11158                         /* map-align the start of the first copy entry... */
11159                         adjustment = (tmp_entry->vme_start -
11160                                       vm_map_trunc_page(
11161                                               tmp_entry->vme_start,
11162                                               VM_MAP_PAGE_MASK(src_map)));
11163                         tmp_entry->vme_start -= adjustment;
11164                         VME_OFFSET_SET(tmp_entry,
11165                                        VME_OFFSET(tmp_entry) - adjustment);
11166                         copy_addr -= adjustment;
11167                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
11168                         /* ... adjust for mis-aligned start of copy range */
11169                         adjustment =
11170                                 (vm_map_trunc_page(copy->offset,
11171                                                    PAGE_MASK) -
11172                                  vm_map_trunc_page(copy->offset,
11173                                                    VM_MAP_PAGE_MASK(src_map)));
11174                         if (adjustment) {
11175                                 assert(page_aligned(adjustment));
11176                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11177                                 tmp_entry->vme_start += adjustment;
11178                                 VME_OFFSET_SET(tmp_entry,
11179                                                (VME_OFFSET(tmp_entry) +
11180                                                 adjustment));
11181                                 copy_addr += adjustment;
11182                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11183                         }
11184
11185                         /*
11186                          * Assert that the adjustments haven't exposed
11187                          * more than was originally copied...
11188                          */
11189                         assert(tmp_entry->vme_start >= original_start);
11190                         assert(VME_OFFSET(tmp_entry) >= original_offset);
11191                         /*
11192                          * ... and that it did not adjust outside of a
11193                          * a single 16K page.
11194                          */
11195                         assert(vm_map_trunc_page(tmp_entry->vme_start,
11196                                                  VM_MAP_PAGE_MASK(src_map)) ==
11197                                vm_map_trunc_page(original_start,
11198                                                  VM_MAP_PAGE_MASK(src_map)));
11199                 }
11200
11201                 /* adjust alignment of last copy_entry's "vme_end" */
11202                 tmp_entry = vm_map_copy_last_entry(copy);
11203                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
11204                         vm_map_offset_t adjustment;
11205
11206                         original_end = tmp_entry->vme_end;
11207
11208                         /* map-align the end of the last copy entry... */
11209                         tmp_entry->vme_end =
11210                                 vm_map_round_page(tmp_entry->vme_end,
11211                                                   VM_MAP_PAGE_MASK(src_map));
11212                         /* ... adjust for mis-aligned end of copy range */
11213                         adjustment =
11214                                 (vm_map_round_page((copy->offset +
11215                                                     copy->size),
11216                                                    VM_MAP_PAGE_MASK(src_map)) -
11217                                  vm_map_round_page((copy->offset +
11218                                                     copy->size),
11219                                                    PAGE_MASK));
11220                         if (adjustment) {
11221                                 assert(page_aligned(adjustment));
11222                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
11223                                 tmp_entry->vme_end -= adjustment;
11224                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11225                         }
11226
11227                         /*
11228                          * Assert that the adjustments haven't exposed
11229                          * more than was originally copied...
11230                          */
11231                         assert(tmp_entry->vme_end <= original_end);
11232                         /*
11233                          * ... and that it did not adjust outside of a
11234                          * a single 16K page.
11235                          */
11236                         assert(vm_map_round_page(tmp_entry->vme_end,
11237                                                  VM_MAP_PAGE_MASK(src_map)) ==
11238                                vm_map_round_page(original_end,
11239                                                  VM_MAP_PAGE_MASK(src_map)));
11240                 }
11241         }
11242
11243         /* Fix-up start and end points in copy.  This is necessary */
11244         /* when the various entries in the copy object were picked */
11245         /* up from different sub-maps */
11246
11247         tmp_entry = vm_map_copy_first_entry(copy);
11248         copy_size = 0; /* compute actual size */
11249         while (tmp_entry != vm_map_copy_to_entry(copy)) {
11250                 assert(VM_MAP_PAGE_ALIGNED(
11251                                copy_addr + (tmp_entry->vme_end -
11252                                             tmp_entry->vme_start),
11253                                VM_MAP_COPY_PAGE_MASK(copy)));
11254                 assert(VM_MAP_PAGE_ALIGNED(
11255                                copy_addr,
11256                                VM_MAP_COPY_PAGE_MASK(copy)));
11257
11258                 /*
11259                  * The copy_entries will be injected directly into the
11260                  * destination map and might not be "map aligned" there...
11261                  */
11262                 tmp_entry->map_aligned = FALSE;
11263
11264                 tmp_entry->vme_end = copy_addr +
11265                         (tmp_entry->vme_end - tmp_entry->vme_start);
11266                 tmp_entry->vme_start = copy_addr;
11267                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
11268                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
11269                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
11270                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
11271         }
11272
11273         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
11274             copy_size < copy->size) {
11275                 /*
11276                  * The actual size of the VM map copy is smaller than what
11277                  * was requested by the caller.  This must be because some
11278                  * PAGE_SIZE-sized pages are missing at the end of the last
11279                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
11280                  * The caller might not have been aware of those missing
11281                  * pages and might not want to be aware of it, which is
11282                  * fine as long as they don't try to access (and crash on)
11283                  * those missing pages.
11284                  * Let's adjust the size of the "copy", to avoid failing
11285                  * in vm_map_copyout() or vm_map_copy_overwrite().
11286                  */
11287                 assert(vm_map_round_page(copy_size,
11288                                          VM_MAP_PAGE_MASK(src_map)) ==
11289                        vm_map_round_page(copy->size,
11290                                          VM_MAP_PAGE_MASK(src_map)));
11291                 copy->size = copy_size;
11292         }
11293
11294         *copy_result = copy;
11295         return(KERN_SUCCESS);
11296
11297 #undef  RETURN
11298 }
11299
11300 kern_return_t
11301 vm_map_copy_extract(
11302         vm_map_t                src_map,
11303         vm_map_address_t        src_addr,
11304         vm_map_size_t           len,
11305         vm_map_copy_t           *copy_result,   /* OUT */
11306         vm_prot_t               *cur_prot,      /* OUT */
11307         vm_prot_t               *max_prot)
11308 {
11309         vm_map_offset_t src_start, src_end;
11310         vm_map_copy_t   copy;
11311         kern_return_t   kr;
11312
11313         /*
11314          *      Check for copies of zero bytes.
11315          */
11316
11317         if (len == 0) {
11318                 *copy_result = VM_MAP_COPY_NULL;
11319                 return(KERN_SUCCESS);
11320         }
11321
11322         /*
11323          *      Check that the end address doesn't overflow
11324          */
11325         src_end = src_addr + len;
11326         if (src_end < src_addr)
11327                 return KERN_INVALID_ADDRESS;
11328
11329         /*
11330          *      Compute (page aligned) start and end of region
11331          */
11332         src_start = vm_map_trunc_page(src_addr, PAGE_MASK);
11333         src_end = vm_map_round_page(src_end, PAGE_MASK);
11334
11335         /*
11336          *      Allocate a header element for the list.
11337          *
11338          *      Use the start and end in the header to
11339          *      remember the endpoints prior to rounding.
11340          */
11341
11342         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
11343         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
11344         vm_map_copy_first_entry(copy) =
11345                 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
11346         copy->type = VM_MAP_COPY_ENTRY_LIST;
11347         copy->cpy_hdr.nentries = 0;
11348         copy->cpy_hdr.entries_pageable = TRUE;
11349
11350         vm_map_store_init(&copy->cpy_hdr);
11351
11352         copy->offset = 0;
11353         copy->size = len;
11354
11355         kr = vm_map_remap_extract(src_map,
11356                                   src_addr,
11357                                   len,
11358                                   FALSE, /* copy */
11359                                   &copy->cpy_hdr,
11360                                   cur_prot,
11361                                   max_prot,
11362                                   VM_INHERIT_SHARE,
11363                                   TRUE, /* pageable */
11364                                   FALSE, /* same_map */
11365                                   VM_MAP_KERNEL_FLAGS_NONE);
11366         if (kr != KERN_SUCCESS) {
11367                 vm_map_copy_discard(copy);
11368                 return kr;
11369         }
11370
11371         *copy_result = copy;
11372         return KERN_SUCCESS;
11373 }
11374
11375 /*
11376  *      vm_map_copyin_object:
11377  *
11378  *      Create a copy object from an object.
11379  *      Our caller donates an object reference.
11380  */
11381
11382 kern_return_t
11383 vm_map_copyin_object(
11384         vm_object_t             object,
11385         vm_object_offset_t      offset, /* offset of region in object */
11386         vm_object_size_t        size,   /* size of region in object */
11387         vm_map_copy_t   *copy_result)   /* OUT */
11388 {
11389         vm_map_copy_t   copy;           /* Resulting copy */
11390
11391         /*
11392          *      We drop the object into a special copy object
11393          *      that contains the object directly.
11394          */
11395
11396         copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
11397         copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
11398         copy->type = VM_MAP_COPY_OBJECT;
11399         copy->cpy_object = object;
11400         copy->offset = offset;
11401         copy->size = size;
11402
11403         *copy_result = copy;
11404         return(KERN_SUCCESS);
11405 }
11406
11407 static void
11408 vm_map_fork_share(
11409         vm_map_t        old_map,
11410         vm_map_entry_t  old_entry,
11411         vm_map_t        new_map)
11412 {
11413         vm_object_t     object;
11414         vm_map_entry_t  new_entry;
11415
11416         /*
11417          *      New sharing code.  New map entry
11418          *      references original object.  Internal
11419          *      objects use asynchronous copy algorithm for
11420          *      future copies.  First make sure we have
11421          *      the right object.  If we need a shadow,
11422          *      or someone else already has one, then
11423          *      make a new shadow and share it.
11424          */
11425
11426         object = VME_OBJECT(old_entry);
11427         if (old_entry->is_sub_map) {
11428                 assert(old_entry->wired_count == 0);
11429 #ifndef NO_NESTED_PMAP
11430                 if(old_entry->use_pmap) {
11431                         kern_return_t   result;
11432
11433                         result = pmap_nest(new_map->pmap,
11434                                            (VME_SUBMAP(old_entry))->pmap,
11435                                            (addr64_t)old_entry->vme_start,
11436                                            (addr64_t)old_entry->vme_start,
11437                                            (uint64_t)(old_entry->vme_end - old_entry->vme_start));
11438                         if(result)
11439                                 panic("vm_map_fork_share: pmap_nest failed!");
11440                 }
11441 #endif  /* NO_NESTED_PMAP */
11442         } else if (object == VM_OBJECT_NULL) {
11443                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
11444                                                             old_entry->vme_start));
11445                 VME_OFFSET_SET(old_entry, 0);
11446                 VME_OBJECT_SET(old_entry, object);
11447                 old_entry->use_pmap = TRUE;
11448         } else if (object->copy_strategy !=
11449                    MEMORY_OBJECT_COPY_SYMMETRIC) {
11450
11451                 /*
11452                  *      We are already using an asymmetric
11453                  *      copy, and therefore we already have
11454                  *      the right object.
11455                  */
11456
11457                 assert(! old_entry->needs_copy);
11458         }
11459         else if (old_entry->needs_copy ||       /* case 1 */
11460                  object->shadowed ||            /* case 2 */
11461                  (!object->true_share &&        /* case 3 */
11462                   !old_entry->is_shared &&
11463                   (object->vo_size >
11464                    (vm_map_size_t)(old_entry->vme_end -
11465                                    old_entry->vme_start)))) {
11466
11467                 /*
11468                  *      We need to create a shadow.
11469                  *      There are three cases here.
11470                  *      In the first case, we need to
11471                  *      complete a deferred symmetrical
11472                  *      copy that we participated in.
11473                  *      In the second and third cases,
11474                  *      we need to create the shadow so
11475                  *      that changes that we make to the
11476                  *      object do not interfere with
11477                  *      any symmetrical copies which
11478                  *      have occured (case 2) or which
11479                  *      might occur (case 3).
11480                  *
11481                  *      The first case is when we had
11482                  *      deferred shadow object creation
11483                  *      via the entry->needs_copy mechanism.
11484                  *      This mechanism only works when
11485                  *      only one entry points to the source
11486                  *      object, and we are about to create
11487                  *      a second entry pointing to the
11488                  *      same object. The problem is that
11489                  *      there is no way of mapping from
11490                  *      an object to the entries pointing
11491                  *      to it. (Deferred shadow creation
11492                  *      works with one entry because occurs
11493                  *      at fault time, and we walk from the
11494                  *      entry to the object when handling
11495                  *      the fault.)
11496                  *
11497                  *      The second case is when the object
11498                  *      to be shared has already been copied
11499                  *      with a symmetric copy, but we point
11500                  *      directly to the object without
11501                  *      needs_copy set in our entry. (This
11502                  *      can happen because different ranges
11503                  *      of an object can be pointed to by
11504                  *      different entries. In particular,
11505                  *      a single entry pointing to an object
11506                  *      can be split by a call to vm_inherit,
11507                  *      which, combined with task_create, can
11508                  *      result in the different entries
11509                  *      having different needs_copy values.)
11510                  *      The shadowed flag in the object allows
11511                  *      us to detect this case. The problem
11512                  *      with this case is that if this object
11513                  *      has or will have shadows, then we
11514                  *      must not perform an asymmetric copy
11515                  *      of this object, since such a copy
11516                  *      allows the object to be changed, which
11517                  *      will break the previous symmetrical
11518                  *      copies (which rely upon the object
11519                  *      not changing). In a sense, the shadowed
11520                  *      flag says "don't change this object".
11521                  *      We fix this by creating a shadow
11522                  *      object for this object, and sharing
11523                  *      that. This works because we are free
11524                  *      to change the shadow object (and thus
11525                  *      to use an asymmetric copy strategy);
11526                  *      this is also semantically correct,
11527                  *      since this object is temporary, and
11528                  *      therefore a copy of the object is
11529                  *      as good as the object itself. (This
11530                  *      is not true for permanent objects,
11531                  *      since the pager needs to see changes,
11532                  *      which won't happen if the changes
11533                  *      are made to a copy.)
11534                  *
11535                  *      The third case is when the object
11536                  *      to be shared has parts sticking
11537                  *      outside of the entry we're working
11538                  *      with, and thus may in the future
11539                  *      be subject to a symmetrical copy.
11540                  *      (This is a preemptive version of
11541                  *      case 2.)
11542                  */
11543                 VME_OBJECT_SHADOW(old_entry,
11544                                   (vm_map_size_t) (old_entry->vme_end -
11545                                                    old_entry->vme_start));
11546
11547                 /*
11548                  *      If we're making a shadow for other than
11549                  *      copy on write reasons, then we have
11550                  *      to remove write permission.
11551                  */
11552
11553                 if (!old_entry->needs_copy &&
11554                     (old_entry->protection & VM_PROT_WRITE)) {
11555                         vm_prot_t prot;
11556
11557                         assert(!pmap_has_prot_policy(old_entry->protection));
11558
11559                         prot = old_entry->protection & ~VM_PROT_WRITE;
11560
11561                         assert(!pmap_has_prot_policy(prot));
11562
11563                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot)
11564                                 prot |= VM_PROT_EXECUTE;
11565
11566
11567                         if (old_map->mapped_in_other_pmaps) {
11568                                 vm_object_pmap_protect(
11569                                         VME_OBJECT(old_entry),
11570                                         VME_OFFSET(old_entry),
11571                                         (old_entry->vme_end -
11572                                          old_entry->vme_start),
11573                                         PMAP_NULL,
11574                                         old_entry->vme_start,
11575                                         prot);
11576                         } else {
11577                                 pmap_protect(old_map->pmap,
11578                                              old_entry->vme_start,
11579                                              old_entry->vme_end,
11580                                              prot);
11581                         }
11582                 }
11583
11584                 old_entry->needs_copy = FALSE;
11585                 object = VME_OBJECT(old_entry);
11586         }
11587
11588
11589         /*
11590          *      If object was using a symmetric copy strategy,
11591          *      change its copy strategy to the default
11592          *      asymmetric copy strategy, which is copy_delay
11593          *      in the non-norma case and copy_call in the
11594          *      norma case. Bump the reference count for the
11595          *      new entry.
11596          */
11597
11598         if(old_entry->is_sub_map) {
11599                 vm_map_lock(VME_SUBMAP(old_entry));
11600                 vm_map_reference(VME_SUBMAP(old_entry));
11601                 vm_map_unlock(VME_SUBMAP(old_entry));
11602         } else {
11603                 vm_object_lock(object);
11604                 vm_object_reference_locked(object);
11605                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
11606                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
11607                 }
11608                 vm_object_unlock(object);
11609         }
11610
11611         /*
11612          *      Clone the entry, using object ref from above.
11613          *      Mark both entries as shared.
11614          */
11615
11616         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
11617                                                           * map or descendants */
11618         vm_map_entry_copy(new_entry, old_entry);
11619         old_entry->is_shared = TRUE;
11620         new_entry->is_shared = TRUE;
11621
11622         /*
11623          *      If old entry's inheritence is VM_INHERIT_NONE,
11624          *      the new entry is for corpse fork, remove the
11625          *      write permission from the new entry.
11626          */
11627         if (old_entry->inheritance == VM_INHERIT_NONE) {
11628
11629                 new_entry->protection &= ~VM_PROT_WRITE;
11630                 new_entry->max_protection &= ~VM_PROT_WRITE;
11631         }
11632
11633         /*
11634          *      Insert the entry into the new map -- we
11635          *      know we're inserting at the end of the new
11636          *      map.
11637          */
11638
11639         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry);
11640
11641         /*
11642          *      Update the physical map
11643          */
11644
11645         if (old_entry->is_sub_map) {
11646                 /* Bill Angell pmap support goes here */
11647         } else {
11648                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
11649                           old_entry->vme_end - old_entry->vme_start,
11650                           old_entry->vme_start);
11651         }
11652 }
11653
11654 static boolean_t
11655 vm_map_fork_copy(
11656         vm_map_t        old_map,
11657         vm_map_entry_t  *old_entry_p,
11658         vm_map_t        new_map,
11659         int             vm_map_copyin_flags)
11660 {
11661         vm_map_entry_t old_entry = *old_entry_p;
11662         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
11663         vm_map_offset_t start = old_entry->vme_start;
11664         vm_map_copy_t copy;
11665         vm_map_entry_t last = vm_map_last_entry(new_map);
11666
11667         vm_map_unlock(old_map);
11668         /*
11669          *      Use maxprot version of copyin because we
11670          *      care about whether this memory can ever
11671          *      be accessed, not just whether it's accessible
11672          *      right now.
11673          */
11674         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
11675         if (vm_map_copyin_internal(old_map, start, entry_size,
11676                                    vm_map_copyin_flags, &copy)
11677             != KERN_SUCCESS) {
11678                 /*
11679                  *      The map might have changed while it
11680                  *      was unlocked, check it again.  Skip
11681                  *      any blank space or permanently
11682                  *      unreadable region.
11683                  */
11684                 vm_map_lock(old_map);
11685                 if (!vm_map_lookup_entry(old_map, start, &last) ||
11686                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
11687                         last = last->vme_next;
11688                 }
11689                 *old_entry_p = last;
11690
11691                 /*
11692                  * XXX  For some error returns, want to
11693                  * XXX  skip to the next element.  Note
11694                  *      that INVALID_ADDRESS and
11695                  *      PROTECTION_FAILURE are handled above.
11696                  */
11697
11698                 return FALSE;
11699         }
11700
11701         /*
11702          *      Insert the copy into the new map
11703          */
11704
11705         vm_map_copy_insert(new_map, last, copy);
11706
11707         /*
11708          *      Pick up the traversal at the end of
11709          *      the copied region.
11710          */
11711
11712         vm_map_lock(old_map);
11713         start += entry_size;
11714         if (! vm_map_lookup_entry(old_map, start, &last)) {
11715                 last = last->vme_next;
11716         } else {
11717                 if (last->vme_start == start) {
11718                         /*
11719                          * No need to clip here and we don't
11720                          * want to cause any unnecessary
11721                          * unnesting...
11722                          */
11723                 } else {
11724                         vm_map_clip_start(old_map, last, start);
11725                 }
11726         }
11727         *old_entry_p = last;
11728
11729         return TRUE;
11730 }
11731
11732 /*
11733  *      vm_map_fork:
11734  *
11735  *      Create and return a new map based on the old
11736  *      map, according to the inheritance values on the
11737  *      regions in that map and the options.
11738  *
11739  *      The source map must not be locked.
11740  */
11741 vm_map_t
11742 vm_map_fork(
11743         ledger_t        ledger,
11744         vm_map_t        old_map,
11745         int             options)
11746 {
11747         pmap_t          new_pmap;
11748         vm_map_t        new_map;
11749         vm_map_entry_t  old_entry;
11750         vm_map_size_t   new_size = 0, entry_size;
11751         vm_map_entry_t  new_entry;
11752         boolean_t       src_needs_copy;
11753         boolean_t       new_entry_needs_copy;
11754         boolean_t       pmap_is64bit;
11755         int             vm_map_copyin_flags;
11756
11757         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
11758                         VM_MAP_FORK_PRESERVE_PURGEABLE)) {
11759                 /* unsupported option */
11760                 return VM_MAP_NULL;
11761         }
11762
11763         pmap_is64bit =
11764 #if defined(__i386__) || defined(__x86_64__)
11765                                old_map->pmap->pm_task_map != TASK_MAP_32BIT;
11766 #elif defined(__arm64__)
11767                                old_map->pmap->max == MACH_VM_MAX_ADDRESS;
11768 #elif defined(__arm__)
11769                                FALSE;
11770 #else
11771 #error Unknown architecture.
11772 #endif
11773
11774         new_pmap = pmap_create(ledger, (vm_map_size_t) 0, pmap_is64bit);
11775
11776         vm_map_reference_swap(old_map);
11777         vm_map_lock(old_map);
11778
11779         new_map = vm_map_create(new_pmap,
11780                                 old_map->min_offset,
11781                                 old_map->max_offset,
11782                                 old_map->hdr.entries_pageable);
11783         vm_map_lock(new_map);
11784         vm_commit_pagezero_status(new_map);
11785         /* inherit the parent map's page size */
11786         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
11787         for (
11788                 old_entry = vm_map_first_entry(old_map);
11789                 old_entry != vm_map_to_entry(old_map);
11790                 ) {
11791
11792                 entry_size = old_entry->vme_end - old_entry->vme_start;
11793
11794                 switch (old_entry->inheritance) {
11795                 case VM_INHERIT_NONE:
11796                         /*
11797                          * Skip making a share entry if VM_MAP_FORK_SHARE_IF_INHERIT_NONE
11798                          * is not passed or it is backed by a device pager.
11799                          */
11800                         if ((!(options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE)) ||
11801                                 (!old_entry->is_sub_map &&
11802                                 VME_OBJECT(old_entry) != NULL &&
11803                                 VME_OBJECT(old_entry)->pager != NULL &&
11804                                 is_device_pager_ops(VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
11805                                 break;
11806                         }
11807                         /* FALLTHROUGH */
11808
11809                 case VM_INHERIT_SHARE:
11810                         vm_map_fork_share(old_map, old_entry, new_map);
11811                         new_size += entry_size;
11812                         break;
11813
11814                 case VM_INHERIT_COPY:
11815
11816                         /*
11817                          *      Inline the copy_quickly case;
11818                          *      upon failure, fall back on call
11819                          *      to vm_map_fork_copy.
11820                          */
11821
11822                         if(old_entry->is_sub_map)
11823                                 break;
11824                         if ((old_entry->wired_count != 0) ||
11825                             ((VME_OBJECT(old_entry) != NULL) &&
11826                              (VME_OBJECT(old_entry)->true_share))) {
11827                                 goto slow_vm_map_fork_copy;
11828                         }
11829
11830                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
11831                         vm_map_entry_copy(new_entry, old_entry);
11832                         if (new_entry->is_sub_map) {
11833                                 /* clear address space specifics */
11834                                 new_entry->use_pmap = FALSE;
11835                         }
11836
11837                         if (! vm_object_copy_quickly(
11838                                     &VME_OBJECT(new_entry),
11839                                     VME_OFFSET(old_entry),
11840                                     (old_entry->vme_end -
11841                                      old_entry->vme_start),
11842                                     &src_needs_copy,
11843                                     &new_entry_needs_copy)) {
11844                                 vm_map_entry_dispose(new_map, new_entry);
11845                                 goto slow_vm_map_fork_copy;
11846                         }
11847
11848                         /*
11849                          *      Handle copy-on-write obligations
11850                          */
11851
11852                         if (src_needs_copy && !old_entry->needs_copy) {
11853                                 vm_prot_t prot;
11854
11855                                 assert(!pmap_has_prot_policy(old_entry->protection));
11856
11857                                 prot = old_entry->protection & ~VM_PROT_WRITE;
11858
11859                                 if (override_nx(old_map, VME_ALIAS(old_entry))
11860                                     && prot)
11861                                         prot |= VM_PROT_EXECUTE;
11862
11863                                 assert(!pmap_has_prot_policy(prot));
11864
11865                                 vm_object_pmap_protect(
11866                                         VME_OBJECT(old_entry),
11867                                         VME_OFFSET(old_entry),
11868                                         (old_entry->vme_end -
11869                                          old_entry->vme_start),
11870                                         ((old_entry->is_shared
11871                                           || old_map->mapped_in_other_pmaps)
11872                                          ? PMAP_NULL :
11873                                          old_map->pmap),
11874                                         old_entry->vme_start,
11875                                         prot);
11876
11877                                 assert(old_entry->wired_count == 0);
11878                                 old_entry->needs_copy = TRUE;
11879                         }
11880                         new_entry->needs_copy = new_entry_needs_copy;
11881
11882                         /*
11883                          *      Insert the entry at the end
11884                          *      of the map.
11885                          */
11886
11887                         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map),
11888                                           new_entry);
11889                         new_size += entry_size;
11890                         break;
11891
11892                 slow_vm_map_fork_copy:
11893                         vm_map_copyin_flags = 0;
11894                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
11895                                 vm_map_copyin_flags |=
11896                                         VM_MAP_COPYIN_PRESERVE_PURGEABLE;
11897                         }
11898                         if (vm_map_fork_copy(old_map,
11899                                              &old_entry,
11900                                              new_map,
11901                                              vm_map_copyin_flags)) {
11902                                 new_size += entry_size;
11903                         }
11904                         continue;
11905                 }
11906                 old_entry = old_entry->vme_next;
11907         }
11908
11909 #if defined(__arm64__)
11910         pmap_insert_sharedpage(new_map->pmap);
11911 #endif
11912
11913         new_map->size = new_size;
11914         vm_map_unlock(new_map);
11915         vm_map_unlock(old_map);
11916         vm_map_deallocate(old_map);
11917
11918         return(new_map);
11919 }
11920
11921 /*
11922  * vm_map_exec:
11923  *
11924  *      Setup the "new_map" with the proper execution environment according
11925  *      to the type of executable (platform, 64bit, chroot environment).
11926  *      Map the comm page and shared region, etc...
11927  */
11928 kern_return_t
11929 vm_map_exec(
11930         vm_map_t        new_map,
11931         task_t          task,
11932         boolean_t       is64bit,
11933         void            *fsroot,
11934         cpu_type_t      cpu)
11935 {
11936         SHARED_REGION_TRACE_DEBUG(
11937                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n",
11938                  (void *)VM_KERNEL_ADDRPERM(current_task()),
11939                  (void *)VM_KERNEL_ADDRPERM(new_map),
11940                  (void *)VM_KERNEL_ADDRPERM(task),
11941                  (void *)VM_KERNEL_ADDRPERM(fsroot),
11942                  cpu));
11943         (void) vm_commpage_enter(new_map, task, is64bit);
11944         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu);
11945         SHARED_REGION_TRACE_DEBUG(
11946                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n",
11947                  (void *)VM_KERNEL_ADDRPERM(current_task()),
11948                  (void *)VM_KERNEL_ADDRPERM(new_map),
11949                  (void *)VM_KERNEL_ADDRPERM(task),
11950                  (void *)VM_KERNEL_ADDRPERM(fsroot),
11951                  cpu));
11952         return KERN_SUCCESS;
11953 }
11954
11955 /*
11956  *      vm_map_lookup_locked:
11957  *
11958  *      Finds the VM object, offset, and
11959  *      protection for a given virtual address in the
11960  *      specified map, assuming a page fault of the
11961  *      type specified.
11962  *
11963  *      Returns the (object, offset, protection) for
11964  *      this address, whether it is wired down, and whether
11965  *      this map has the only reference to the data in question.
11966  *      In order to later verify this lookup, a "version"
11967  *      is returned.
11968  *
11969  *      The map MUST be locked by the caller and WILL be
11970  *      locked on exit.  In order to guarantee the
11971  *      existence of the returned object, it is returned
11972  *      locked.
11973  *
11974  *      If a lookup is requested with "write protection"
11975  *      specified, the map may be changed to perform virtual
11976  *      copying operations, although the data referenced will
11977  *      remain the same.
11978  */
11979 kern_return_t
11980 vm_map_lookup_locked(
11981         vm_map_t                *var_map,       /* IN/OUT */
11982         vm_map_offset_t         vaddr,
11983         vm_prot_t               fault_type,
11984         int                     object_lock_type,
11985         vm_map_version_t        *out_version,   /* OUT */
11986         vm_object_t             *object,        /* OUT */
11987         vm_object_offset_t      *offset,        /* OUT */
11988         vm_prot_t               *out_prot,      /* OUT */
11989         boolean_t               *wired,         /* OUT */
11990         vm_object_fault_info_t  fault_info,     /* OUT */
11991         vm_map_t                *real_map)
11992 {
11993         vm_map_entry_t                  entry;
11994         vm_map_t                        map = *var_map;
11995         vm_map_t                        old_map = *var_map;
11996         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
11997         vm_map_offset_t                 cow_parent_vaddr = 0;
11998         vm_map_offset_t                 old_start = 0;
11999         vm_map_offset_t                 old_end = 0;
12000         vm_prot_t                       prot;
12001         boolean_t                       mask_protections;
12002         boolean_t                       force_copy;
12003         vm_prot_t                       original_fault_type;
12004
12005         /*
12006          * VM_PROT_MASK means that the caller wants us to use "fault_type"
12007          * as a mask against the mapping's actual protections, not as an
12008          * absolute value.
12009          */
12010         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
12011         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
12012         fault_type &= VM_PROT_ALL;
12013         original_fault_type = fault_type;
12014
12015         *real_map = map;
12016
12017 RetryLookup:
12018         fault_type = original_fault_type;
12019
12020         /*
12021          *      If the map has an interesting hint, try it before calling
12022          *      full blown lookup routine.
12023          */
12024         entry = map->hint;
12025
12026         if ((entry == vm_map_to_entry(map)) ||
12027             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
12028                 vm_map_entry_t  tmp_entry;
12029
12030                 /*
12031                  *      Entry was either not a valid hint, or the vaddr
12032                  *      was not contained in the entry, so do a full lookup.
12033                  */
12034                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
12035                         if((cow_sub_map_parent) && (cow_sub_map_parent != map))
12036                                 vm_map_unlock(cow_sub_map_parent);
12037                         if((*real_map != map)
12038                            && (*real_map != cow_sub_map_parent))
12039                                 vm_map_unlock(*real_map);
12040                         return KERN_INVALID_ADDRESS;
12041                 }
12042
12043                 entry = tmp_entry;
12044         }
12045         if(map == old_map) {
12046                 old_start = entry->vme_start;
12047                 old_end = entry->vme_end;
12048         }
12049
12050         /*
12051          *      Handle submaps.  Drop lock on upper map, submap is
12052          *      returned locked.
12053          */
12054
12055 submap_recurse:
12056         if (entry->is_sub_map) {
12057                 vm_map_offset_t         local_vaddr;
12058                 vm_map_offset_t         end_delta;
12059                 vm_map_offset_t         start_delta;
12060                 vm_map_entry_t          submap_entry;
12061                 vm_prot_t               subentry_protection;
12062                 vm_prot_t               subentry_max_protection;
12063                 boolean_t               mapped_needs_copy=FALSE;
12064
12065                 local_vaddr = vaddr;
12066
12067                 if ((entry->use_pmap &&
12068                      ! ((fault_type & VM_PROT_WRITE) ||
12069                         force_copy))) {
12070                         /* if real_map equals map we unlock below */
12071                         if ((*real_map != map) &&
12072                             (*real_map != cow_sub_map_parent))
12073                                 vm_map_unlock(*real_map);
12074                         *real_map = VME_SUBMAP(entry);
12075                 }
12076
12077                 if(entry->needs_copy &&
12078                    ((fault_type & VM_PROT_WRITE) ||
12079                     force_copy)) {
12080                         if (!mapped_needs_copy) {
12081                                 if (vm_map_lock_read_to_write(map)) {
12082                                         vm_map_lock_read(map);
12083                                         *real_map = map;
12084                                         goto RetryLookup;
12085                                 }
12086                                 vm_map_lock_read(VME_SUBMAP(entry));
12087                                 *var_map = VME_SUBMAP(entry);
12088                                 cow_sub_map_parent = map;
12089                                 /* reset base to map before cow object */
12090                                 /* this is the map which will accept   */
12091                                 /* the new cow object */
12092                                 old_start = entry->vme_start;
12093                                 old_end = entry->vme_end;
12094                                 cow_parent_vaddr = vaddr;
12095                                 mapped_needs_copy = TRUE;
12096                         } else {
12097                                 vm_map_lock_read(VME_SUBMAP(entry));
12098                                 *var_map = VME_SUBMAP(entry);
12099                                 if((cow_sub_map_parent != map) &&
12100                                    (*real_map != map))
12101                                         vm_map_unlock(map);
12102                         }
12103                 } else {
12104                         vm_map_lock_read(VME_SUBMAP(entry));
12105                         *var_map = VME_SUBMAP(entry);
12106                         /* leave map locked if it is a target */
12107                         /* cow sub_map above otherwise, just  */
12108                         /* follow the maps down to the object */
12109                         /* here we unlock knowing we are not  */
12110                         /* revisiting the map.  */
12111                         if((*real_map != map) && (map != cow_sub_map_parent))
12112                                 vm_map_unlock_read(map);
12113                 }
12114
12115                 map = *var_map;
12116
12117                 /* calculate the offset in the submap for vaddr */
12118                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
12119
12120         RetrySubMap:
12121                 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
12122                         if((cow_sub_map_parent) && (cow_sub_map_parent != map)){
12123                                 vm_map_unlock(cow_sub_map_parent);
12124                         }
12125                         if((*real_map != map)
12126                            && (*real_map != cow_sub_map_parent)) {
12127                                 vm_map_unlock(*real_map);
12128                         }
12129                         *real_map = map;
12130                         return KERN_INVALID_ADDRESS;
12131                 }
12132
12133                 /* find the attenuated shadow of the underlying object */
12134                 /* on our target map */
12135
12136                 /* in english the submap object may extend beyond the     */
12137                 /* region mapped by the entry or, may only fill a portion */
12138                 /* of it.  For our purposes, we only care if the object   */
12139                 /* doesn't fill.  In this case the area which will        */
12140                 /* ultimately be clipped in the top map will only need    */
12141                 /* to be as big as the portion of the underlying entry    */
12142                 /* which is mapped */
12143                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
12144                         submap_entry->vme_start - VME_OFFSET(entry) : 0;
12145
12146                 end_delta =
12147                         (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
12148                         submap_entry->vme_end ?
12149                         0 : (VME_OFFSET(entry) +
12150                              (old_end - old_start))
12151                         - submap_entry->vme_end;
12152
12153                 old_start += start_delta;
12154                 old_end -= end_delta;
12155
12156                 if(submap_entry->is_sub_map) {
12157                         entry = submap_entry;
12158                         vaddr = local_vaddr;
12159                         goto submap_recurse;
12160                 }
12161
12162                 if (((fault_type & VM_PROT_WRITE) ||
12163                      force_copy)
12164                     && cow_sub_map_parent) {
12165
12166                         vm_object_t     sub_object, copy_object;
12167                         vm_object_offset_t copy_offset;
12168                         vm_map_offset_t local_start;
12169                         vm_map_offset_t local_end;
12170                         boolean_t               copied_slowly = FALSE;
12171
12172                         if (vm_map_lock_read_to_write(map)) {
12173                                 vm_map_lock_read(map);
12174                                 old_start -= start_delta;
12175                                 old_end += end_delta;
12176                                 goto RetrySubMap;
12177                         }
12178
12179
12180                         sub_object = VME_OBJECT(submap_entry);
12181                         if (sub_object == VM_OBJECT_NULL) {
12182                                 sub_object =
12183                                         vm_object_allocate(
12184                                                 (vm_map_size_t)
12185                                                 (submap_entry->vme_end -
12186                                                  submap_entry->vme_start));
12187                                 VME_OBJECT_SET(submap_entry, sub_object);
12188                                 VME_OFFSET_SET(submap_entry, 0);
12189                         }
12190                         local_start =  local_vaddr -
12191                                 (cow_parent_vaddr - old_start);
12192                         local_end = local_vaddr +
12193                                 (old_end - cow_parent_vaddr);
12194                         vm_map_clip_start(map, submap_entry, local_start);
12195                         vm_map_clip_end(map, submap_entry, local_end);
12196                         if (submap_entry->is_sub_map) {
12197                                 /* unnesting was done when clipping */
12198                                 assert(!submap_entry->use_pmap);
12199                         }
12200
12201                         /* This is the COW case, lets connect */
12202                         /* an entry in our space to the underlying */
12203                         /* object in the submap, bypassing the  */
12204                         /* submap. */
12205
12206
12207                         if(submap_entry->wired_count != 0 ||
12208                            (sub_object->copy_strategy ==
12209                             MEMORY_OBJECT_COPY_NONE)) {
12210                                 vm_object_lock(sub_object);
12211                                 vm_object_copy_slowly(sub_object,
12212                                                       VME_OFFSET(submap_entry),
12213                                                       (submap_entry->vme_end -
12214                                                        submap_entry->vme_start),
12215                                                       FALSE,
12216                                                       &copy_object);
12217                                 copied_slowly = TRUE;
12218                         } else {
12219
12220                                 /* set up shadow object */
12221                                 copy_object = sub_object;
12222                                 vm_object_lock(sub_object);
12223                                 vm_object_reference_locked(sub_object);
12224                                 sub_object->shadowed = TRUE;
12225                                 vm_object_unlock(sub_object);
12226
12227                                 assert(submap_entry->wired_count == 0);
12228                                 submap_entry->needs_copy = TRUE;
12229
12230                                 prot = submap_entry->protection;
12231                                 assert(!pmap_has_prot_policy(prot));
12232                                 prot = prot & ~VM_PROT_WRITE;
12233                                 assert(!pmap_has_prot_policy(prot));
12234
12235                                 if (override_nx(old_map,
12236                                                 VME_ALIAS(submap_entry))
12237                                     && prot)
12238                                         prot |= VM_PROT_EXECUTE;
12239
12240                                 vm_object_pmap_protect(
12241                                         sub_object,
12242                                         VME_OFFSET(submap_entry),
12243                                         submap_entry->vme_end -
12244                                         submap_entry->vme_start,
12245                                         (submap_entry->is_shared
12246                                          || map->mapped_in_other_pmaps) ?
12247                                         PMAP_NULL : map->pmap,
12248                                         submap_entry->vme_start,
12249                                         prot);
12250                         }
12251
12252                         /*
12253                          * Adjust the fault offset to the submap entry.
12254                          */
12255                         copy_offset = (local_vaddr -
12256                                        submap_entry->vme_start +
12257                                        VME_OFFSET(submap_entry));
12258
12259                         /* This works diffently than the   */
12260                         /* normal submap case. We go back  */
12261                         /* to the parent of the cow map and*/
12262                         /* clip out the target portion of  */
12263                         /* the sub_map, substituting the   */
12264                         /* new copy object,                */
12265
12266                         subentry_protection = submap_entry->protection;
12267                         subentry_max_protection = submap_entry->max_protection;
12268                         vm_map_unlock(map);
12269                         submap_entry = NULL; /* not valid after map unlock */
12270
12271                         local_start = old_start;
12272                         local_end = old_end;
12273                         map = cow_sub_map_parent;
12274                         *var_map = cow_sub_map_parent;
12275                         vaddr = cow_parent_vaddr;
12276                         cow_sub_map_parent = NULL;
12277
12278                         if(!vm_map_lookup_entry(map,
12279                                                 vaddr, &entry)) {
12280                                 vm_object_deallocate(
12281                                         copy_object);
12282                                 vm_map_lock_write_to_read(map);
12283                                 return KERN_INVALID_ADDRESS;
12284                         }
12285
12286                         /* clip out the portion of space */
12287                         /* mapped by the sub map which   */
12288                         /* corresponds to the underlying */
12289                         /* object */
12290
12291                         /*
12292                          * Clip (and unnest) the smallest nested chunk
12293                          * possible around the faulting address...
12294                          */
12295                         local_start = vaddr & ~(pmap_nesting_size_min - 1);
12296                         local_end = local_start + pmap_nesting_size_min;
12297                         /*
12298                          * ... but don't go beyond the "old_start" to "old_end"
12299                          * range, to avoid spanning over another VM region
12300                          * with a possibly different VM object and/or offset.
12301                          */
12302                         if (local_start < old_start) {
12303                                 local_start = old_start;
12304                         }
12305                         if (local_end > old_end) {
12306                                 local_end = old_end;
12307                         }
12308                         /*
12309                          * Adjust copy_offset to the start of the range.
12310                          */
12311                         copy_offset -= (vaddr - local_start);
12312
12313                         vm_map_clip_start(map, entry, local_start);
12314                         vm_map_clip_end(map, entry, local_end);
12315                         if (entry->is_sub_map) {
12316                                 /* unnesting was done when clipping */
12317                                 assert(!entry->use_pmap);
12318                         }
12319
12320                         /* substitute copy object for */
12321                         /* shared map entry           */
12322                         vm_map_deallocate(VME_SUBMAP(entry));
12323                         assert(!entry->iokit_acct);
12324                         entry->is_sub_map = FALSE;
12325                         entry->use_pmap = TRUE;
12326                         VME_OBJECT_SET(entry, copy_object);
12327
12328                         /* propagate the submap entry's protections */
12329                         entry->protection |= subentry_protection;
12330                         entry->max_protection |= subentry_max_protection;
12331
12332 #if CONFIG_EMBEDDED
12333                         if (entry->protection & VM_PROT_WRITE) {
12334                                 if ((entry->protection & VM_PROT_EXECUTE) && !(entry->used_for_jit)) {
12335                                         printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
12336                                         entry->protection &= ~VM_PROT_EXECUTE;
12337                                 }
12338                         }
12339 #endif
12340
12341                         if(copied_slowly) {
12342                                 VME_OFFSET_SET(entry, local_start - old_start);
12343                                 entry->needs_copy = FALSE;
12344                                 entry->is_shared = FALSE;
12345                         } else {
12346                                 VME_OFFSET_SET(entry, copy_offset);
12347                                 assert(entry->wired_count == 0);
12348                                 entry->needs_copy = TRUE;
12349                                 if(entry->inheritance == VM_INHERIT_SHARE)
12350                                         entry->inheritance = VM_INHERIT_COPY;
12351                                 if (map != old_map)
12352                                         entry->is_shared = TRUE;
12353                         }
12354                         if(entry->inheritance == VM_INHERIT_SHARE)
12355                                 entry->inheritance = VM_INHERIT_COPY;
12356
12357                         vm_map_lock_write_to_read(map);
12358                 } else {
12359                         if((cow_sub_map_parent)
12360                            && (cow_sub_map_parent != *real_map)
12361                            && (cow_sub_map_parent != map)) {
12362                                 vm_map_unlock(cow_sub_map_parent);
12363                         }
12364                         entry = submap_entry;
12365                         vaddr = local_vaddr;
12366                 }
12367         }
12368
12369         /*
12370          *      Check whether this task is allowed to have
12371          *      this page.
12372          */
12373
12374         prot = entry->protection;
12375
12376         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
12377                 /*
12378                  * HACK -- if not a stack, then allow execution
12379                  */
12380                 prot |= VM_PROT_EXECUTE;
12381         }
12382
12383         if (mask_protections) {
12384                 fault_type &= prot;
12385                 if (fault_type == VM_PROT_NONE) {
12386                         goto protection_failure;
12387                 }
12388         }
12389         if (((fault_type & prot) != fault_type)
12390 #if __arm64__
12391             /* prefetch abort in execute-only page */
12392             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
12393 #endif
12394             ) {
12395         protection_failure:
12396                 if (*real_map != map) {
12397                         vm_map_unlock(*real_map);
12398                 }
12399                 *real_map = map;
12400
12401                 if ((fault_type & VM_PROT_EXECUTE) && prot)
12402                         log_stack_execution_failure((addr64_t)vaddr, prot);
12403
12404                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
12405                 return KERN_PROTECTION_FAILURE;
12406         }
12407
12408         /*
12409          *      If this page is not pageable, we have to get
12410          *      it for all possible accesses.
12411          */
12412
12413         *wired = (entry->wired_count != 0);
12414         if (*wired)
12415                 fault_type = prot;
12416
12417         /*
12418          *      If the entry was copy-on-write, we either ...
12419          */
12420
12421         if (entry->needs_copy) {
12422                 /*
12423                  *      If we want to write the page, we may as well
12424                  *      handle that now since we've got the map locked.
12425                  *
12426                  *      If we don't need to write the page, we just
12427                  *      demote the permissions allowed.
12428                  */
12429
12430                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
12431                         /*
12432                          *      Make a new object, and place it in the
12433                          *      object chain.  Note that no new references
12434                          *      have appeared -- one just moved from the
12435                          *      map to the new object.
12436                          */
12437
12438                         if (vm_map_lock_read_to_write(map)) {
12439                                 vm_map_lock_read(map);
12440                                 goto RetryLookup;
12441                         }
12442
12443                         if (VME_OBJECT(entry)->shadowed == FALSE) {
12444                                 vm_object_lock(VME_OBJECT(entry));
12445                                 VME_OBJECT(entry)->shadowed = TRUE;
12446                                 vm_object_unlock(VME_OBJECT(entry));
12447                         }
12448                         VME_OBJECT_SHADOW(entry,
12449                                           (vm_map_size_t) (entry->vme_end -
12450                                                            entry->vme_start));
12451                         entry->needs_copy = FALSE;
12452
12453                         vm_map_lock_write_to_read(map);
12454                 }
12455                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
12456                         /*
12457                          *      We're attempting to read a copy-on-write
12458                          *      page -- don't allow writes.
12459                          */
12460
12461                         prot &= (~VM_PROT_WRITE);
12462                 }
12463         }
12464
12465         /*
12466          *      Create an object if necessary.
12467          */
12468         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
12469
12470                 if (vm_map_lock_read_to_write(map)) {
12471                         vm_map_lock_read(map);
12472                         goto RetryLookup;
12473                 }
12474
12475                 VME_OBJECT_SET(entry,
12476                                vm_object_allocate(
12477                                        (vm_map_size_t)(entry->vme_end -
12478                                                        entry->vme_start)));
12479                 VME_OFFSET_SET(entry, 0);
12480                 vm_map_lock_write_to_read(map);
12481         }
12482
12483         /*
12484          *      Return the object/offset from this entry.  If the entry
12485          *      was copy-on-write or empty, it has been fixed up.  Also
12486          *      return the protection.
12487          */
12488
12489         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
12490         *object = VME_OBJECT(entry);
12491         *out_prot = prot;
12492
12493         if (fault_info) {
12494                 fault_info->interruptible = THREAD_UNINT; /* for now... */
12495                 /* ... the caller will change "interruptible" if needed */
12496                 fault_info->cluster_size = 0;
12497                 fault_info->user_tag = VME_ALIAS(entry);
12498                 fault_info->pmap_options = 0;
12499                 if (entry->iokit_acct ||
12500                     (!entry->is_sub_map && !entry->use_pmap)) {
12501                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
12502                 }
12503                 fault_info->behavior = entry->behavior;
12504                 fault_info->lo_offset = VME_OFFSET(entry);
12505                 fault_info->hi_offset =
12506                         (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
12507                 fault_info->no_cache  = entry->no_cache;
12508                 fault_info->stealth = FALSE;
12509                 fault_info->io_sync = FALSE;
12510                 if (entry->used_for_jit ||
12511                     entry->vme_resilient_codesign) {
12512                         fault_info->cs_bypass = TRUE;
12513                 } else {
12514                         fault_info->cs_bypass = FALSE;
12515                 }
12516                 fault_info->mark_zf_absent = FALSE;
12517                 fault_info->batch_pmap_op = FALSE;
12518         }
12519
12520         /*
12521          *      Lock the object to prevent it from disappearing
12522          */
12523         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE)
12524                 vm_object_lock(*object);
12525         else
12526                 vm_object_lock_shared(*object);
12527
12528         /*
12529          *      Save the version number
12530          */
12531
12532         out_version->main_timestamp = map->timestamp;
12533
12534         return KERN_SUCCESS;
12535 }
12536
12537
12538 /*
12539  *      vm_map_verify:
12540  *
12541  *      Verifies that the map in question has not changed
12542  *      since the given version. The map has to be locked
12543  *      ("shared" mode is fine) before calling this function
12544  *      and it will be returned locked too.
12545  */
12546 boolean_t
12547 vm_map_verify(
12548         vm_map_t                map,
12549         vm_map_version_t        *version)       /* REF */
12550 {
12551         boolean_t       result;
12552
12553         vm_map_lock_assert_held(map);
12554         result = (map->timestamp == version->main_timestamp);
12555
12556         return(result);
12557 }
12558
12559 /*
12560  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
12561  *      Goes away after regular vm_region_recurse function migrates to
12562  *      64 bits
12563  *      vm_region_recurse: A form of vm_region which follows the
12564  *      submaps in a target map
12565  *
12566  */
12567
12568 #if DEVELOPMENT || DEBUG
12569 int vm_region_footprint = 0;
12570 #endif /* DEVELOPMENT || DEBUG */
12571
12572 kern_return_t
12573 vm_map_region_recurse_64(
12574         vm_map_t                 map,
12575         vm_map_offset_t *address,               /* IN/OUT */
12576         vm_map_size_t           *size,                  /* OUT */
12577         natural_t               *nesting_depth, /* IN/OUT */
12578         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
12579         mach_msg_type_number_t  *count) /* IN/OUT */
12580 {
12581         mach_msg_type_number_t  original_count;
12582         vm_region_extended_info_data_t  extended;
12583         vm_map_entry_t                  tmp_entry;
12584         vm_map_offset_t                 user_address;
12585         unsigned int                    user_max_depth;
12586
12587         /*
12588          * "curr_entry" is the VM map entry preceding or including the
12589          * address we're looking for.
12590          * "curr_map" is the map or sub-map containing "curr_entry".
12591          * "curr_address" is the equivalent of the top map's "user_address"
12592          * in the current map.
12593          * "curr_offset" is the cumulated offset of "curr_map" in the
12594          * target task's address space.
12595          * "curr_depth" is the depth of "curr_map" in the chain of
12596          * sub-maps.
12597          *
12598          * "curr_max_below" and "curr_max_above" limit the range (around
12599          * "curr_address") we should take into account in the current (sub)map.
12600          * They limit the range to what's visible through the map entries
12601          * we've traversed from the top map to the current map.
12602
12603          */
12604         vm_map_entry_t                  curr_entry;
12605         vm_map_address_t                curr_address;
12606         vm_map_offset_t                 curr_offset;
12607         vm_map_t                        curr_map;
12608         unsigned int                    curr_depth;
12609         vm_map_offset_t                 curr_max_below, curr_max_above;
12610         vm_map_offset_t                 curr_skip;
12611
12612         /*
12613          * "next_" is the same as "curr_" but for the VM region immediately
12614          * after the address we're looking for.  We need to keep track of this
12615          * too because we want to return info about that region if the
12616          * address we're looking for is not mapped.
12617          */
12618         vm_map_entry_t                  next_entry;
12619         vm_map_offset_t                 next_offset;
12620         vm_map_offset_t                 next_address;
12621         vm_map_t                        next_map;
12622         unsigned int                    next_depth;
12623         vm_map_offset_t                 next_max_below, next_max_above;
12624         vm_map_offset_t                 next_skip;
12625
12626         boolean_t                       look_for_pages;
12627         vm_region_submap_short_info_64_t short_info;
12628
12629         if (map == VM_MAP_NULL) {
12630                 /* no address space to work on */
12631                 return KERN_INVALID_ARGUMENT;
12632         }
12633
12634
12635         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
12636                 /*
12637                  * "info" structure is not big enough and
12638                  * would overflow
12639                  */
12640                 return KERN_INVALID_ARGUMENT;
12641         }
12642
12643         original_count = *count;
12644
12645         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
12646                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
12647                 look_for_pages = FALSE;
12648                 short_info = (vm_region_submap_short_info_64_t) submap_info;
12649                 submap_info = NULL;
12650         } else {
12651                 look_for_pages = TRUE;
12652                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
12653                 short_info = NULL;
12654
12655                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
12656                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
12657                 }
12658         }
12659
12660         user_address = *address;
12661         user_max_depth = *nesting_depth;
12662
12663         if (not_in_kdp) {
12664                 vm_map_lock_read(map);
12665         }
12666
12667 recurse_again:
12668         curr_entry = NULL;
12669         curr_map = map;
12670         curr_address = user_address;
12671         curr_offset = 0;
12672         curr_skip = 0;
12673         curr_depth = 0;
12674         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
12675         curr_max_below = curr_address;
12676
12677         next_entry = NULL;
12678         next_map = NULL;
12679         next_address = 0;
12680         next_offset = 0;
12681         next_skip = 0;
12682         next_depth = 0;
12683         next_max_above = (vm_map_offset_t) -1;
12684         next_max_below = (vm_map_offset_t) -1;
12685
12686         for (;;) {
12687                 if (vm_map_lookup_entry(curr_map,
12688                                         curr_address,
12689                                         &tmp_entry)) {
12690                         /* tmp_entry contains the address we're looking for */
12691                         curr_entry = tmp_entry;
12692                 } else {
12693                         vm_map_offset_t skip;
12694                         /*
12695                          * The address is not mapped.  "tmp_entry" is the
12696                          * map entry preceding the address.  We want the next
12697                          * one, if it exists.
12698                          */
12699                         curr_entry = tmp_entry->vme_next;
12700
12701                         if (curr_entry == vm_map_to_entry(curr_map) ||
12702                             (curr_entry->vme_start >=
12703                              curr_address + curr_max_above)) {
12704                                 /* no next entry at this level: stop looking */
12705                                 if (not_in_kdp) {
12706                                         vm_map_unlock_read(curr_map);
12707                                 }
12708                                 curr_entry = NULL;
12709                                 curr_map = NULL;
12710                                 curr_skip = 0;
12711                                 curr_offset = 0;
12712                                 curr_depth = 0;
12713                                 curr_max_above = 0;
12714                                 curr_max_below = 0;
12715                                 break;
12716                         }
12717
12718                         /* adjust current address and offset */
12719                         skip = curr_entry->vme_start - curr_address;
12720                         curr_address = curr_entry->vme_start;
12721                         curr_skip += skip;
12722                         curr_offset += skip;
12723                         curr_max_above -= skip;
12724                         curr_max_below = 0;
12725                 }
12726
12727                 /*
12728                  * Is the next entry at this level closer to the address (or
12729                  * deeper in the submap chain) than the one we had
12730                  * so far ?
12731                  */
12732                 tmp_entry = curr_entry->vme_next;
12733                 if (tmp_entry == vm_map_to_entry(curr_map)) {
12734                         /* no next entry at this level */
12735                 } else if (tmp_entry->vme_start >=
12736                            curr_address + curr_max_above) {
12737                         /*
12738                          * tmp_entry is beyond the scope of what we mapped of
12739                          * this submap in the upper level: ignore it.
12740                          */
12741                 } else if ((next_entry == NULL) ||
12742                            (tmp_entry->vme_start + curr_offset <=
12743                             next_entry->vme_start + next_offset)) {
12744                         /*
12745                          * We didn't have a "next_entry" or this one is
12746                          * closer to the address we're looking for:
12747                          * use this "tmp_entry" as the new "next_entry".
12748                          */
12749                         if (next_entry != NULL) {
12750                                 /* unlock the last "next_map" */
12751                                 if (next_map != curr_map && not_in_kdp) {
12752                                         vm_map_unlock_read(next_map);
12753                                 }
12754                         }
12755                         next_entry = tmp_entry;
12756                         next_map = curr_map;
12757                         next_depth = curr_depth;
12758                         next_address = next_entry->vme_start;
12759                         next_skip = curr_skip;
12760                         next_skip += (next_address - curr_address);
12761                         next_offset = curr_offset;
12762                         next_offset += (next_address - curr_address);
12763                         next_max_above = MIN(next_max_above, curr_max_above);
12764                         next_max_above = MIN(next_max_above,
12765                                              next_entry->vme_end - next_address);
12766                         next_max_below = MIN(next_max_below, curr_max_below);
12767                         next_max_below = MIN(next_max_below,
12768                                              next_address - next_entry->vme_start);
12769                 }
12770
12771                 /*
12772                  * "curr_max_{above,below}" allow us to keep track of the
12773                  * portion of the submap that is actually mapped at this level:
12774                  * the rest of that submap is irrelevant to us, since it's not
12775                  * mapped here.
12776                  * The relevant portion of the map starts at
12777                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
12778                  */
12779                 curr_max_above = MIN(curr_max_above,
12780                                      curr_entry->vme_end - curr_address);
12781                 curr_max_below = MIN(curr_max_below,
12782                                      curr_address - curr_entry->vme_start);
12783
12784                 if (!curr_entry->is_sub_map ||
12785                     curr_depth >= user_max_depth) {
12786                         /*
12787                          * We hit a leaf map or we reached the maximum depth
12788                          * we could, so stop looking.  Keep the current map
12789                          * locked.
12790                          */
12791                         break;
12792                 }
12793
12794                 /*
12795                  * Get down to the next submap level.
12796                  */
12797
12798                 /*
12799                  * Lock the next level and unlock the current level,
12800                  * unless we need to keep it locked to access the "next_entry"
12801                  * later.
12802                  */
12803                 if (not_in_kdp) {
12804                         vm_map_lock_read(VME_SUBMAP(curr_entry));
12805                 }
12806                 if (curr_map == next_map) {
12807                         /* keep "next_map" locked in case we need it */
12808                 } else {
12809                         /* release this map */
12810                         if (not_in_kdp)
12811                                 vm_map_unlock_read(curr_map);
12812                 }
12813
12814                 /*
12815                  * Adjust the offset.  "curr_entry" maps the submap
12816                  * at relative address "curr_entry->vme_start" in the
12817                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
12818                  * bytes of the submap.
12819                  * "curr_offset" always represents the offset of a virtual
12820                  * address in the curr_map relative to the absolute address
12821                  * space (i.e. the top-level VM map).
12822                  */
12823                 curr_offset +=
12824                         (VME_OFFSET(curr_entry) - curr_entry->vme_start);
12825                 curr_address = user_address + curr_offset;
12826                 /* switch to the submap */
12827                 curr_map = VME_SUBMAP(curr_entry);
12828                 curr_depth++;
12829                 curr_entry = NULL;
12830         }
12831
12832         if (curr_entry == NULL) {
12833                 /* no VM region contains the address... */
12834 #if DEVELOPMENT || DEBUG
12835                 if (vm_region_footprint && /* we want footprint numbers */
12836                     look_for_pages && /* & we want page counts */
12837                     next_entry == NULL && /* & there are no more regions */
12838                     /* & we haven't already provided our fake region: */
12839                     user_address == vm_map_last_entry(map)->vme_end) {
12840                         ledger_amount_t nonvol, nonvol_compressed;
12841                         /*
12842                          * Add a fake memory region to account for
12843                          * purgeable memory that counts towards this
12844                          * task's memory footprint, i.e. the resident
12845                          * compressed pages of non-volatile objects
12846                          * owned by that task.
12847                          */
12848                         ledger_get_balance(
12849                                 map->pmap->ledger,
12850                                 task_ledgers.purgeable_nonvolatile,
12851                                 &nonvol);
12852                         ledger_get_balance(
12853                                 map->pmap->ledger,
12854                                 task_ledgers.purgeable_nonvolatile_compressed,
12855                                 &nonvol_compressed);
12856                         if (nonvol + nonvol_compressed == 0) {
12857                                 /* no purgeable memory usage to report */
12858                                 return KERN_FAILURE;
12859                         }
12860                         /* fake region to show nonvolatile footprint */
12861                         submap_info->protection = VM_PROT_DEFAULT;
12862                         submap_info->max_protection = VM_PROT_DEFAULT;
12863                         submap_info->inheritance = VM_INHERIT_DEFAULT;
12864                         submap_info->offset = 0;
12865                         submap_info->user_tag = 0;
12866                         submap_info->pages_resident = (unsigned int) (nonvol / PAGE_SIZE);
12867                         submap_info->pages_shared_now_private = 0;
12868                         submap_info->pages_swapped_out = (unsigned int) (nonvol_compressed / PAGE_SIZE);
12869                         submap_info->pages_dirtied = submap_info->pages_resident;
12870                         submap_info->ref_count = 1;
12871                         submap_info->shadow_depth = 0;
12872                         submap_info->external_pager = 0;
12873                         submap_info->share_mode = SM_PRIVATE;
12874                         submap_info->is_submap = 0;
12875                         submap_info->behavior = VM_BEHAVIOR_DEFAULT;
12876                         submap_info->object_id = 0x11111111;
12877                         submap_info->user_wired_count = 0;
12878                         submap_info->pages_reusable = 0;
12879                         *nesting_depth = 0;
12880                         *size = (vm_map_size_t) (nonvol + nonvol_compressed);
12881                         *address = user_address;
12882                         return KERN_SUCCESS;
12883                 }
12884 #endif /* DEVELOPMENT || DEBUG */
12885                 if (next_entry == NULL) {
12886                         /* ... and no VM region follows it either */
12887                         return KERN_INVALID_ADDRESS;
12888                 }
12889                 /* ... gather info about the next VM region */
12890                 curr_entry = next_entry;
12891                 curr_map = next_map;    /* still locked ... */
12892                 curr_address = next_address;
12893                 curr_skip = next_skip;
12894                 curr_offset = next_offset;
12895                 curr_depth = next_depth;
12896                 curr_max_above = next_max_above;
12897                 curr_max_below = next_max_below;
12898         } else {
12899                 /* we won't need "next_entry" after all */
12900                 if (next_entry != NULL) {
12901                         /* release "next_map" */
12902                         if (next_map != curr_map && not_in_kdp) {
12903                                 vm_map_unlock_read(next_map);
12904                         }
12905                 }
12906         }
12907         next_entry = NULL;
12908         next_map = NULL;
12909         next_offset = 0;
12910         next_skip = 0;
12911         next_depth = 0;
12912         next_max_below = -1;
12913         next_max_above = -1;
12914
12915         if (curr_entry->is_sub_map &&
12916             curr_depth < user_max_depth) {
12917                 /*
12918                  * We're not as deep as we could be:  we must have
12919                  * gone back up after not finding anything mapped
12920                  * below the original top-level map entry's.
12921                  * Let's move "curr_address" forward and recurse again.
12922                  */
12923                 user_address = curr_address;
12924                 goto recurse_again;
12925         }
12926
12927         *nesting_depth = curr_depth;
12928         *size = curr_max_above + curr_max_below;
12929         *address = user_address + curr_skip - curr_max_below;
12930
12931 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
12932 // so probably should be a real 32b ID vs. ptr.
12933 // Current users just check for equality
12934 #define INFO_MAKE_OBJECT_ID(p)  ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p))
12935
12936         if (look_for_pages) {
12937                 submap_info->user_tag = VME_ALIAS(curr_entry);
12938                 submap_info->offset = VME_OFFSET(curr_entry);
12939                 submap_info->protection = curr_entry->protection;
12940                 submap_info->inheritance = curr_entry->inheritance;
12941                 submap_info->max_protection = curr_entry->max_protection;
12942                 submap_info->behavior = curr_entry->behavior;
12943                 submap_info->user_wired_count = curr_entry->user_wired_count;
12944                 submap_info->is_submap = curr_entry->is_sub_map;
12945                 submap_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
12946         } else {
12947                 short_info->user_tag = VME_ALIAS(curr_entry);
12948                 short_info->offset = VME_OFFSET(curr_entry);
12949                 short_info->protection = curr_entry->protection;
12950                 short_info->inheritance = curr_entry->inheritance;
12951                 short_info->max_protection = curr_entry->max_protection;
12952                 short_info->behavior = curr_entry->behavior;
12953                 short_info->user_wired_count = curr_entry->user_wired_count;
12954                 short_info->is_submap = curr_entry->is_sub_map;
12955                 short_info->object_id = INFO_MAKE_OBJECT_ID(VME_OBJECT(curr_entry));
12956         }
12957
12958         extended.pages_resident = 0;
12959         extended.pages_swapped_out = 0;
12960         extended.pages_shared_now_private = 0;
12961         extended.pages_dirtied = 0;
12962         extended.pages_reusable = 0;
12963         extended.external_pager = 0;
12964         extended.shadow_depth = 0;
12965         extended.share_mode = SM_EMPTY;
12966         extended.ref_count = 0;
12967
12968         if (not_in_kdp) {
12969                 if (!curr_entry->is_sub_map) {
12970                         vm_map_offset_t range_start, range_end;
12971                         range_start = MAX((curr_address - curr_max_below),
12972                                           curr_entry->vme_start);
12973                         range_end = MIN((curr_address + curr_max_above),
12974                                         curr_entry->vme_end);
12975                         vm_map_region_walk(curr_map,
12976                                            range_start,
12977                                            curr_entry,
12978                                            (VME_OFFSET(curr_entry) +
12979                                             (range_start -
12980                                              curr_entry->vme_start)),
12981                                            range_end - range_start,
12982                                            &extended,
12983                                            look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
12984                         if (extended.external_pager &&
12985                             extended.ref_count == 2 &&
12986                             extended.share_mode == SM_SHARED) {
12987                                 extended.share_mode = SM_PRIVATE;
12988                         }
12989                 } else {
12990                         if (curr_entry->use_pmap) {
12991                                 extended.share_mode = SM_TRUESHARED;
12992                         } else {
12993                                 extended.share_mode = SM_PRIVATE;
12994                         }
12995                         extended.ref_count = VME_SUBMAP(curr_entry)->ref_count;
12996                 }
12997         }
12998
12999         if (look_for_pages) {
13000                 submap_info->pages_resident = extended.pages_resident;
13001                 submap_info->pages_swapped_out = extended.pages_swapped_out;
13002                 submap_info->pages_shared_now_private =
13003                         extended.pages_shared_now_private;
13004                 submap_info->pages_dirtied = extended.pages_dirtied;
13005                 submap_info->external_pager = extended.external_pager;
13006                 submap_info->shadow_depth = extended.shadow_depth;
13007                 submap_info->share_mode = extended.share_mode;
13008                 submap_info->ref_count = extended.ref_count;
13009
13010                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
13011                         submap_info->pages_reusable = extended.pages_reusable;
13012                 }
13013         } else {
13014                 short_info->external_pager = extended.external_pager;
13015                 short_info->shadow_depth = extended.shadow_depth;
13016                 short_info->share_mode = extended.share_mode;
13017                 short_info->ref_count = extended.ref_count;
13018         }
13019
13020         if (not_in_kdp) {
13021                 vm_map_unlock_read(curr_map);
13022         }
13023
13024         return KERN_SUCCESS;
13025 }
13026
13027 /*
13028  *      vm_region:
13029  *
13030  *      User call to obtain information about a region in
13031  *      a task's address map. Currently, only one flavor is
13032  *      supported.
13033  *
13034  *      XXX The reserved and behavior fields cannot be filled
13035  *          in until the vm merge from the IK is completed, and
13036  *          vm_reserve is implemented.
13037  */
13038
13039 kern_return_t
13040 vm_map_region(
13041         vm_map_t                 map,
13042         vm_map_offset_t *address,               /* IN/OUT */
13043         vm_map_size_t           *size,                  /* OUT */
13044         vm_region_flavor_t       flavor,                /* IN */
13045         vm_region_info_t         info,                  /* OUT */
13046         mach_msg_type_number_t  *count, /* IN/OUT */
13047         mach_port_t             *object_name)           /* OUT */
13048 {
13049         vm_map_entry_t          tmp_entry;
13050         vm_map_entry_t          entry;
13051         vm_map_offset_t         start;
13052
13053         if (map == VM_MAP_NULL)
13054                 return(KERN_INVALID_ARGUMENT);
13055
13056         switch (flavor) {
13057
13058         case VM_REGION_BASIC_INFO:
13059                 /* legacy for old 32-bit objects info */
13060         {
13061                 vm_region_basic_info_t  basic;
13062
13063                 if (*count < VM_REGION_BASIC_INFO_COUNT)
13064                         return(KERN_INVALID_ARGUMENT);
13065
13066                 basic = (vm_region_basic_info_t) info;
13067                 *count = VM_REGION_BASIC_INFO_COUNT;
13068
13069                 vm_map_lock_read(map);
13070
13071                 start = *address;
13072                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13073                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13074                                 vm_map_unlock_read(map);
13075                                 return(KERN_INVALID_ADDRESS);
13076                         }
13077                 } else {
13078                         entry = tmp_entry;
13079                 }
13080
13081                 start = entry->vme_start;
13082
13083                 basic->offset = (uint32_t)VME_OFFSET(entry);
13084                 basic->protection = entry->protection;
13085                 basic->inheritance = entry->inheritance;
13086                 basic->max_protection = entry->max_protection;
13087                 basic->behavior = entry->behavior;
13088                 basic->user_wired_count = entry->user_wired_count;
13089                 basic->reserved = entry->is_sub_map;
13090                 *address = start;
13091                 *size = (entry->vme_end - start);
13092
13093                 if (object_name) *object_name = IP_NULL;
13094                 if (entry->is_sub_map) {
13095                         basic->shared = FALSE;
13096                 } else {
13097                         basic->shared = entry->is_shared;
13098                 }
13099
13100                 vm_map_unlock_read(map);
13101                 return(KERN_SUCCESS);
13102         }
13103
13104         case VM_REGION_BASIC_INFO_64:
13105         {
13106                 vm_region_basic_info_64_t       basic;
13107
13108                 if (*count < VM_REGION_BASIC_INFO_COUNT_64)
13109                         return(KERN_INVALID_ARGUMENT);
13110
13111                 basic = (vm_region_basic_info_64_t) info;
13112                 *count = VM_REGION_BASIC_INFO_COUNT_64;
13113
13114                 vm_map_lock_read(map);
13115
13116                 start = *address;
13117                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13118                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13119                                 vm_map_unlock_read(map);
13120                                 return(KERN_INVALID_ADDRESS);
13121                         }
13122                 } else {
13123                         entry = tmp_entry;
13124                 }
13125
13126                 start = entry->vme_start;
13127
13128                 basic->offset = VME_OFFSET(entry);
13129                 basic->protection = entry->protection;
13130                 basic->inheritance = entry->inheritance;
13131                 basic->max_protection = entry->max_protection;
13132                 basic->behavior = entry->behavior;
13133                 basic->user_wired_count = entry->user_wired_count;
13134                 basic->reserved = entry->is_sub_map;
13135                 *address = start;
13136                 *size = (entry->vme_end - start);
13137
13138                 if (object_name) *object_name = IP_NULL;
13139                 if (entry->is_sub_map) {
13140                         basic->shared = FALSE;
13141                 } else {
13142                         basic->shared = entry->is_shared;
13143                 }
13144
13145                 vm_map_unlock_read(map);
13146                 return(KERN_SUCCESS);
13147         }
13148         case VM_REGION_EXTENDED_INFO:
13149                 if (*count < VM_REGION_EXTENDED_INFO_COUNT)
13150                         return(KERN_INVALID_ARGUMENT);
13151                 /*fallthru*/
13152         case VM_REGION_EXTENDED_INFO__legacy:
13153                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy)
13154                         return KERN_INVALID_ARGUMENT;
13155
13156         {
13157                 vm_region_extended_info_t       extended;
13158                 mach_msg_type_number_t original_count;
13159
13160                 extended = (vm_region_extended_info_t) info;
13161
13162                 vm_map_lock_read(map);
13163
13164                 start = *address;
13165                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13166                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13167                                 vm_map_unlock_read(map);
13168                                 return(KERN_INVALID_ADDRESS);
13169                         }
13170                 } else {
13171                         entry = tmp_entry;
13172                 }
13173                 start = entry->vme_start;
13174
13175                 extended->protection = entry->protection;
13176                 extended->user_tag = VME_ALIAS(entry);
13177                 extended->pages_resident = 0;
13178                 extended->pages_swapped_out = 0;
13179                 extended->pages_shared_now_private = 0;
13180                 extended->pages_dirtied = 0;
13181                 extended->external_pager = 0;
13182                 extended->shadow_depth = 0;
13183
13184                 original_count = *count;
13185                 if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
13186                         *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
13187                 } else {
13188                         extended->pages_reusable = 0;
13189                         *count = VM_REGION_EXTENDED_INFO_COUNT;
13190                 }
13191
13192                 vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
13193
13194                 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED)
13195                         extended->share_mode = SM_PRIVATE;
13196
13197                 if (object_name)
13198                         *object_name = IP_NULL;
13199                 *address = start;
13200                 *size = (entry->vme_end - start);
13201
13202                 vm_map_unlock_read(map);
13203                 return(KERN_SUCCESS);
13204         }
13205         case VM_REGION_TOP_INFO:
13206         {
13207                 vm_region_top_info_t    top;
13208
13209                 if (*count < VM_REGION_TOP_INFO_COUNT)
13210                         return(KERN_INVALID_ARGUMENT);
13211
13212                 top = (vm_region_top_info_t) info;
13213                 *count = VM_REGION_TOP_INFO_COUNT;
13214
13215                 vm_map_lock_read(map);
13216
13217                 start = *address;
13218                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
13219                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
13220                                 vm_map_unlock_read(map);
13221                                 return(KERN_INVALID_ADDRESS);
13222                         }
13223                 } else {
13224                         entry = tmp_entry;
13225
13226                 }
13227                 start = entry->vme_start;
13228
13229                 top->private_pages_resident = 0;
13230                 top->shared_pages_resident = 0;
13231
13232                 vm_map_region_top_walk(entry, top);
13233
13234                 if (object_name)
13235                         *object_name = IP_NULL;
13236                 *address = start;
13237                 *size = (entry->vme_end - start);
13238
13239                 vm_map_unlock_read(map);
13240                 return(KERN_SUCCESS);
13241         }
13242         default:
13243                 return(KERN_INVALID_ARGUMENT);
13244         }
13245 }
13246
13247 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
13248         MIN((entry_size),                                               \
13249             ((obj)->all_reusable ?                                      \
13250              (obj)->wired_page_count :                                  \
13251              (obj)->resident_page_count - (obj)->reusable_page_count))
13252
13253 void
13254 vm_map_region_top_walk(
13255         vm_map_entry_t             entry,
13256         vm_region_top_info_t       top)
13257 {
13258
13259         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
13260                 top->share_mode = SM_EMPTY;
13261                 top->ref_count = 0;
13262                 top->obj_id = 0;
13263                 return;
13264         }
13265
13266         {
13267                 struct  vm_object *obj, *tmp_obj;
13268                 int             ref_count;
13269                 uint32_t        entry_size;
13270
13271                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
13272
13273                 obj = VME_OBJECT(entry);
13274
13275                 vm_object_lock(obj);
13276
13277                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13278                         ref_count--;
13279
13280                 assert(obj->reusable_page_count <= obj->resident_page_count);
13281                 if (obj->shadow) {
13282                         if (ref_count == 1)
13283                                 top->private_pages_resident =
13284                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13285                         else
13286                                 top->shared_pages_resident =
13287                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13288                         top->ref_count  = ref_count;
13289                         top->share_mode = SM_COW;
13290
13291                         while ((tmp_obj = obj->shadow)) {
13292                                 vm_object_lock(tmp_obj);
13293                                 vm_object_unlock(obj);
13294                                 obj = tmp_obj;
13295
13296                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13297                                         ref_count--;
13298
13299                                 assert(obj->reusable_page_count <= obj->resident_page_count);
13300                                 top->shared_pages_resident +=
13301                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13302                                 top->ref_count += ref_count - 1;
13303                         }
13304                 } else {
13305                         if (entry->superpage_size) {
13306                                 top->share_mode = SM_LARGE_PAGE;
13307                                 top->shared_pages_resident = 0;
13308                                 top->private_pages_resident = entry_size;
13309                         } else if (entry->needs_copy) {
13310                                 top->share_mode = SM_COW;
13311                                 top->shared_pages_resident =
13312                                         OBJ_RESIDENT_COUNT(obj, entry_size);
13313                         } else {
13314                                 if (ref_count == 1 ||
13315                                     (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) {
13316                                         top->share_mode = SM_PRIVATE;
13317                                                 top->private_pages_resident =
13318                                                         OBJ_RESIDENT_COUNT(obj,
13319                                                                            entry_size);
13320                                 } else {
13321                                         top->share_mode = SM_SHARED;
13322                                         top->shared_pages_resident =
13323                                                 OBJ_RESIDENT_COUNT(obj,
13324                                                                   entry_size);
13325                                 }
13326                         }
13327                         top->ref_count = ref_count;
13328                 }
13329                 /* XXX K64: obj_id will be truncated */
13330                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
13331
13332                 vm_object_unlock(obj);
13333         }
13334 }
13335
13336 void
13337 vm_map_region_walk(
13338         vm_map_t                        map,
13339         vm_map_offset_t                 va,
13340         vm_map_entry_t                  entry,
13341         vm_object_offset_t              offset,
13342         vm_object_size_t                range,
13343         vm_region_extended_info_t       extended,
13344         boolean_t                       look_for_pages,
13345         mach_msg_type_number_t count)
13346 {
13347         struct vm_object *obj, *tmp_obj;
13348         vm_map_offset_t       last_offset;
13349         int               i;
13350         int               ref_count;
13351         struct vm_object        *shadow_object;
13352         int                     shadow_depth;
13353
13354         if ((VME_OBJECT(entry) == 0) ||
13355             (entry->is_sub_map) ||
13356             (VME_OBJECT(entry)->phys_contiguous &&
13357              !entry->superpage_size)) {
13358                 extended->share_mode = SM_EMPTY;
13359                 extended->ref_count = 0;
13360                 return;
13361         }
13362
13363         if (entry->superpage_size) {
13364                 extended->shadow_depth = 0;
13365                 extended->share_mode = SM_LARGE_PAGE;
13366                 extended->ref_count = 1;
13367                 extended->external_pager = 0;
13368                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
13369                 extended->shadow_depth = 0;
13370                 return;
13371         }
13372
13373         obj = VME_OBJECT(entry);
13374
13375         vm_object_lock(obj);
13376
13377         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13378                 ref_count--;
13379
13380         if (look_for_pages) {
13381                 for (last_offset = offset + range;
13382                      offset < last_offset;
13383                      offset += PAGE_SIZE_64, va += PAGE_SIZE) {
13384 #if DEVELOPMENT || DEBUG
13385                         if (vm_region_footprint) {
13386                                 if (obj->purgable != VM_PURGABLE_DENY) {
13387                                         /* alternate accounting */
13388                                 } else if (entry->iokit_acct) {
13389                                         /* alternate accounting */
13390                                         extended->pages_resident++;
13391                                         extended->pages_dirtied++;
13392                                 } else {
13393                                         int disp;
13394
13395                                         disp = 0;
13396                                         pmap_query_page_info(map->pmap, va, &disp);
13397                                         if (disp & PMAP_QUERY_PAGE_PRESENT) {
13398                                                 extended->pages_resident++;
13399                                                 if (disp & PMAP_QUERY_PAGE_REUSABLE) {
13400                                                         extended->pages_reusable++;
13401                                                 } else if (!(disp & PMAP_QUERY_PAGE_INTERNAL) ||
13402                                                            (disp & PMAP_QUERY_PAGE_ALTACCT)) {
13403                                                         /* alternate accounting */
13404                                                 } else {
13405                                                         extended->pages_dirtied++;
13406                                                 }
13407                                         } else if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
13408                                                 if (disp & PMAP_QUERY_PAGE_COMPRESSED_ALTACCT) {
13409                                                         /* alternate accounting */
13410                                                 } else {
13411                                                         extended->pages_swapped_out++;
13412                                                 }
13413                                         }
13414                                 }
13415                                 continue;
13416                         }
13417 #endif /* DEVELOPMENT || DEBUG */
13418                         vm_map_region_look_for_page(map, va, obj,
13419                                                     offset, ref_count,
13420                                                     0, extended, count);
13421                 }
13422 #if DEVELOPMENT || DEBUG
13423                 if (vm_region_footprint) {
13424                         goto collect_object_info;
13425                 }
13426 #endif /* DEVELOPMENT || DEBUG */
13427         } else {
13428 #if DEVELOPMENT || DEBUG
13429         collect_object_info:
13430 #endif /* DEVELOPMENT || DEBUG */
13431                 shadow_object = obj->shadow;
13432                 shadow_depth = 0;
13433
13434                 if ( !(obj->pager_trusted) && !(obj->internal))
13435                         extended->external_pager = 1;
13436
13437                 if (shadow_object != VM_OBJECT_NULL) {
13438                         vm_object_lock(shadow_object);
13439                         for (;
13440                              shadow_object != VM_OBJECT_NULL;
13441                              shadow_depth++) {
13442                                 vm_object_t     next_shadow;
13443
13444                                 if ( !(shadow_object->pager_trusted) &&
13445                                      !(shadow_object->internal))
13446                                         extended->external_pager = 1;
13447
13448                                 next_shadow = shadow_object->shadow;
13449                                 if (next_shadow) {
13450                                         vm_object_lock(next_shadow);
13451                                 }
13452                                 vm_object_unlock(shadow_object);
13453                                 shadow_object = next_shadow;
13454                         }
13455                 }
13456                 extended->shadow_depth = shadow_depth;
13457         }
13458
13459         if (extended->shadow_depth || entry->needs_copy)
13460                 extended->share_mode = SM_COW;
13461         else {
13462                 if (ref_count == 1)
13463                         extended->share_mode = SM_PRIVATE;
13464                 else {
13465                         if (obj->true_share)
13466                                 extended->share_mode = SM_TRUESHARED;
13467                         else
13468                                 extended->share_mode = SM_SHARED;
13469                 }
13470         }
13471         extended->ref_count = ref_count - extended->shadow_depth;
13472
13473         for (i = 0; i < extended->shadow_depth; i++) {
13474                 if ((tmp_obj = obj->shadow) == 0)
13475                         break;
13476                 vm_object_lock(tmp_obj);
13477                 vm_object_unlock(obj);
13478
13479                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress)
13480                         ref_count--;
13481
13482                 extended->ref_count += ref_count;
13483                 obj = tmp_obj;
13484         }
13485         vm_object_unlock(obj);
13486
13487         if (extended->share_mode == SM_SHARED) {
13488                 vm_map_entry_t       cur;
13489                 vm_map_entry_t       last;
13490                 int      my_refs;
13491
13492                 obj = VME_OBJECT(entry);
13493                 last = vm_map_to_entry(map);
13494                 my_refs = 0;
13495
13496                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress)
13497                         ref_count--;
13498                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next)
13499                         my_refs += vm_map_region_count_obj_refs(cur, obj);
13500
13501                 if (my_refs == ref_count)
13502                         extended->share_mode = SM_PRIVATE_ALIASED;
13503                 else if (my_refs > 1)
13504                         extended->share_mode = SM_SHARED_ALIASED;
13505         }
13506 }
13507
13508
13509 /* object is locked on entry and locked on return */
13510
13511
13512 static void
13513 vm_map_region_look_for_page(
13514         __unused vm_map_t               map,
13515         __unused vm_map_offset_t        va,
13516         vm_object_t                     object,
13517         vm_object_offset_t              offset,
13518         int                             max_refcnt,
13519         int                             depth,
13520         vm_region_extended_info_t       extended,
13521         mach_msg_type_number_t count)
13522 {
13523         vm_page_t       p;
13524         vm_object_t     shadow;
13525         int             ref_count;
13526         vm_object_t     caller_object;
13527
13528         shadow = object->shadow;
13529         caller_object = object;
13530
13531
13532         while (TRUE) {
13533
13534                 if ( !(object->pager_trusted) && !(object->internal))
13535                         extended->external_pager = 1;
13536
13537                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
13538                         if (shadow && (max_refcnt == 1))
13539                                 extended->pages_shared_now_private++;
13540
13541                         if (!p->fictitious &&
13542                             (p->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p))))
13543                                 extended->pages_dirtied++;
13544                         else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
13545                                 if (p->reusable || object->all_reusable) {
13546                                         extended->pages_reusable++;
13547                                 }
13548                         }
13549
13550                         extended->pages_resident++;
13551
13552                         if(object != caller_object)
13553                                 vm_object_unlock(object);
13554
13555                         return;
13556                 }
13557                 if (object->internal &&
13558                     object->alive &&
13559                     !object->terminating &&
13560                     object->pager_ready) {
13561
13562                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
13563                             == VM_EXTERNAL_STATE_EXISTS) {
13564                                 /* the pager has that page */
13565                                 extended->pages_swapped_out++;
13566                                 if (object != caller_object)
13567                                         vm_object_unlock(object);
13568                                 return;
13569                         }
13570                 }
13571
13572                 if (shadow) {
13573                         vm_object_lock(shadow);
13574
13575                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress)
13576                                 ref_count--;
13577
13578                         if (++depth > extended->shadow_depth)
13579                                 extended->shadow_depth = depth;
13580
13581                         if (ref_count > max_refcnt)
13582                                 max_refcnt = ref_count;
13583
13584                         if(object != caller_object)
13585                                 vm_object_unlock(object);
13586
13587                         offset = offset + object->vo_shadow_offset;
13588                         object = shadow;
13589                         shadow = object->shadow;
13590                         continue;
13591                 }
13592                 if(object != caller_object)
13593                         vm_object_unlock(object);
13594                 break;
13595         }
13596 }
13597
13598 static int
13599 vm_map_region_count_obj_refs(
13600         vm_map_entry_t    entry,
13601         vm_object_t       object)
13602 {
13603         int ref_count;
13604         vm_object_t chk_obj;
13605         vm_object_t tmp_obj;
13606
13607         if (VME_OBJECT(entry) == 0)
13608                 return(0);
13609
13610         if (entry->is_sub_map)
13611                 return(0);
13612         else {
13613                 ref_count = 0;
13614
13615                 chk_obj = VME_OBJECT(entry);
13616                 vm_object_lock(chk_obj);
13617
13618                 while (chk_obj) {
13619                         if (chk_obj == object)
13620                                 ref_count++;
13621                         tmp_obj = chk_obj->shadow;
13622                         if (tmp_obj)
13623                                 vm_object_lock(tmp_obj);
13624                         vm_object_unlock(chk_obj);
13625
13626                         chk_obj = tmp_obj;
13627                 }
13628         }
13629         return(ref_count);
13630 }
13631
13632
13633 /*
13634  *      Routine:        vm_map_simplify
13635  *
13636  *      Description:
13637  *              Attempt to simplify the map representation in
13638  *              the vicinity of the given starting address.
13639  *      Note:
13640  *              This routine is intended primarily to keep the
13641  *              kernel maps more compact -- they generally don't
13642  *              benefit from the "expand a map entry" technology
13643  *              at allocation time because the adjacent entry
13644  *              is often wired down.
13645  */
13646 void
13647 vm_map_simplify_entry(
13648         vm_map_t        map,
13649         vm_map_entry_t  this_entry)
13650 {
13651         vm_map_entry_t  prev_entry;
13652
13653         counter(c_vm_map_simplify_entry_called++);
13654
13655         prev_entry = this_entry->vme_prev;
13656
13657         if ((this_entry != vm_map_to_entry(map)) &&
13658             (prev_entry != vm_map_to_entry(map)) &&
13659
13660             (prev_entry->vme_end == this_entry->vme_start) &&
13661
13662             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
13663             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
13664             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
13665                                     prev_entry->vme_start))
13666              == VME_OFFSET(this_entry)) &&
13667
13668             (prev_entry->behavior == this_entry->behavior) &&
13669             (prev_entry->needs_copy == this_entry->needs_copy) &&
13670             (prev_entry->protection == this_entry->protection) &&
13671             (prev_entry->max_protection == this_entry->max_protection) &&
13672             (prev_entry->inheritance == this_entry->inheritance) &&
13673             (prev_entry->use_pmap == this_entry->use_pmap) &&
13674             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
13675             (prev_entry->no_cache == this_entry->no_cache) &&
13676             (prev_entry->permanent == this_entry->permanent) &&
13677             (prev_entry->map_aligned == this_entry->map_aligned) &&
13678             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
13679             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
13680             /* from_reserved_zone: OK if that field doesn't match */
13681             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
13682             (prev_entry->vme_resilient_codesign ==
13683              this_entry->vme_resilient_codesign) &&
13684             (prev_entry->vme_resilient_media ==
13685              this_entry->vme_resilient_media) &&
13686
13687             (prev_entry->wired_count == this_entry->wired_count) &&
13688             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
13689
13690             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
13691             (prev_entry->in_transition == FALSE) &&
13692             (this_entry->in_transition == FALSE) &&
13693             (prev_entry->needs_wakeup == FALSE) &&
13694             (this_entry->needs_wakeup == FALSE) &&
13695             (prev_entry->is_shared == FALSE) &&
13696             (this_entry->is_shared == FALSE) &&
13697             (prev_entry->superpage_size == FALSE) &&
13698             (this_entry->superpage_size == FALSE)
13699                 ) {
13700                 vm_map_store_entry_unlink(map, prev_entry);
13701                 assert(prev_entry->vme_start < this_entry->vme_end);
13702                 if (prev_entry->map_aligned)
13703                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
13704                                                    VM_MAP_PAGE_MASK(map)));
13705                 this_entry->vme_start = prev_entry->vme_start;
13706                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
13707
13708                 if (map->holelistenabled) {
13709                         vm_map_store_update_first_free(map, this_entry, TRUE);
13710                 }
13711
13712                 if (prev_entry->is_sub_map) {
13713                         vm_map_deallocate(VME_SUBMAP(prev_entry));
13714                 } else {
13715                         vm_object_deallocate(VME_OBJECT(prev_entry));
13716                 }
13717                 vm_map_entry_dispose(map, prev_entry);
13718                 SAVE_HINT_MAP_WRITE(map, this_entry);
13719                 counter(c_vm_map_simplified++);
13720         }
13721 }
13722
13723 void
13724 vm_map_simplify(
13725         vm_map_t        map,
13726         vm_map_offset_t start)
13727 {
13728         vm_map_entry_t  this_entry;
13729
13730         vm_map_lock(map);
13731         if (vm_map_lookup_entry(map, start, &this_entry)) {
13732                 vm_map_simplify_entry(map, this_entry);
13733                 vm_map_simplify_entry(map, this_entry->vme_next);
13734         }
13735         counter(c_vm_map_simplify_called++);
13736         vm_map_unlock(map);
13737 }
13738
13739 static void
13740 vm_map_simplify_range(
13741         vm_map_t        map,
13742         vm_map_offset_t start,
13743         vm_map_offset_t end)
13744 {
13745         vm_map_entry_t  entry;
13746
13747         /*
13748          * The map should be locked (for "write") by the caller.
13749          */
13750
13751         if (start >= end) {
13752                 /* invalid address range */
13753                 return;
13754         }
13755
13756         start = vm_map_trunc_page(start,
13757                                   VM_MAP_PAGE_MASK(map));
13758         end = vm_map_round_page(end,
13759                                 VM_MAP_PAGE_MASK(map));
13760
13761         if (!vm_map_lookup_entry(map, start, &entry)) {
13762                 /* "start" is not mapped and "entry" ends before "start" */
13763                 if (entry == vm_map_to_entry(map)) {
13764                         /* start with first entry in the map */
13765                         entry = vm_map_first_entry(map);
13766                 } else {
13767                         /* start with next entry */
13768                         entry = entry->vme_next;
13769                 }
13770         }
13771
13772         while (entry != vm_map_to_entry(map) &&
13773                entry->vme_start <= end) {
13774                 /* try and coalesce "entry" with its previous entry */
13775                 vm_map_simplify_entry(map, entry);
13776                 entry = entry->vme_next;
13777         }
13778 }
13779
13780
13781 /*
13782  *      Routine:        vm_map_machine_attribute
13783  *      Purpose:
13784  *              Provide machine-specific attributes to mappings,
13785  *              such as cachability etc. for machines that provide
13786  *              them.  NUMA architectures and machines with big/strange
13787  *              caches will use this.
13788  *      Note:
13789  *              Responsibilities for locking and checking are handled here,
13790  *              everything else in the pmap module. If any non-volatile
13791  *              information must be kept, the pmap module should handle
13792  *              it itself. [This assumes that attributes do not
13793  *              need to be inherited, which seems ok to me]
13794  */
13795 kern_return_t
13796 vm_map_machine_attribute(
13797         vm_map_t                        map,
13798         vm_map_offset_t         start,
13799         vm_map_offset_t         end,
13800         vm_machine_attribute_t  attribute,
13801         vm_machine_attribute_val_t* value)              /* IN/OUT */
13802 {
13803         kern_return_t   ret;
13804         vm_map_size_t sync_size;
13805         vm_map_entry_t entry;
13806
13807         if (start < vm_map_min(map) || end > vm_map_max(map))
13808                 return KERN_INVALID_ADDRESS;
13809
13810         /* Figure how much memory we need to flush (in page increments) */
13811         sync_size = end - start;
13812
13813         vm_map_lock(map);
13814
13815         if (attribute != MATTR_CACHE) {
13816                 /* If we don't have to find physical addresses, we */
13817                 /* don't have to do an explicit traversal here.    */
13818                 ret = pmap_attribute(map->pmap, start, end-start,
13819                                      attribute, value);
13820                 vm_map_unlock(map);
13821                 return ret;
13822         }
13823
13824         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
13825
13826         while(sync_size) {
13827                 if (vm_map_lookup_entry(map, start, &entry)) {
13828                         vm_map_size_t   sub_size;
13829                         if((entry->vme_end - start) > sync_size) {
13830                                 sub_size = sync_size;
13831                                 sync_size = 0;
13832                         } else {
13833                                 sub_size = entry->vme_end - start;
13834                                 sync_size -= sub_size;
13835                         }
13836                         if(entry->is_sub_map) {
13837                                 vm_map_offset_t sub_start;
13838                                 vm_map_offset_t sub_end;
13839
13840                                 sub_start = (start - entry->vme_start)
13841                                         + VME_OFFSET(entry);
13842                                 sub_end = sub_start + sub_size;
13843                                 vm_map_machine_attribute(
13844                                         VME_SUBMAP(entry),
13845                                         sub_start,
13846                                         sub_end,
13847                                         attribute, value);
13848                         } else {
13849                                 if (VME_OBJECT(entry)) {
13850                                         vm_page_t               m;
13851                                         vm_object_t             object;
13852                                         vm_object_t             base_object;
13853                                         vm_object_t             last_object;
13854                                         vm_object_offset_t      offset;
13855                                         vm_object_offset_t      base_offset;
13856                                         vm_map_size_t           range;
13857                                         range = sub_size;
13858                                         offset = (start - entry->vme_start)
13859                                                 + VME_OFFSET(entry);
13860                                         base_offset = offset;
13861                                         object = VME_OBJECT(entry);
13862                                         base_object = object;
13863                                         last_object = NULL;
13864
13865                                         vm_object_lock(object);
13866
13867                                         while (range) {
13868                                                 m = vm_page_lookup(
13869                                                         object, offset);
13870
13871                                                 if (m && !m->fictitious) {
13872                                                         ret =
13873                                                                 pmap_attribute_cache_sync(
13874                                                                         VM_PAGE_GET_PHYS_PAGE(m),
13875                                                                         PAGE_SIZE,
13876                                                                         attribute, value);
13877
13878                                                 } else if (object->shadow) {
13879                                                         offset = offset + object->vo_shadow_offset;
13880                                                         last_object = object;
13881                                                         object = object->shadow;
13882                                                         vm_object_lock(last_object->shadow);
13883                                                         vm_object_unlock(last_object);
13884                                                         continue;
13885                                                 }
13886                                                 range -= PAGE_SIZE;
13887
13888                                                 if (base_object != object) {
13889                                                         vm_object_unlock(object);
13890                                                         vm_object_lock(base_object);
13891                                                         object = base_object;
13892                                                 }
13893                                                 /* Bump to the next page */
13894                                                 base_offset += PAGE_SIZE;
13895                                                 offset = base_offset;
13896                                         }
13897                                         vm_object_unlock(object);
13898                                 }
13899                         }
13900                         start += sub_size;
13901                 } else {
13902                         vm_map_unlock(map);
13903                         return KERN_FAILURE;
13904                 }
13905
13906         }
13907
13908         vm_map_unlock(map);
13909
13910         return ret;
13911 }
13912
13913 /*
13914  *      vm_map_behavior_set:
13915  *
13916  *      Sets the paging reference behavior of the specified address
13917  *      range in the target map.  Paging reference behavior affects
13918  *      how pagein operations resulting from faults on the map will be
13919  *      clustered.
13920  */
13921 kern_return_t
13922 vm_map_behavior_set(
13923         vm_map_t        map,
13924         vm_map_offset_t start,
13925         vm_map_offset_t end,
13926         vm_behavior_t   new_behavior)
13927 {
13928         vm_map_entry_t  entry;
13929         vm_map_entry_t  temp_entry;
13930
13931         XPR(XPR_VM_MAP,
13932             "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d",
13933             map, start, end, new_behavior, 0);
13934
13935         if (start > end ||
13936             start < vm_map_min(map) ||
13937             end > vm_map_max(map)) {
13938                 return KERN_NO_SPACE;
13939         }
13940
13941         switch (new_behavior) {
13942
13943         /*
13944          * This first block of behaviors all set a persistent state on the specified
13945          * memory range.  All we have to do here is to record the desired behavior
13946          * in the vm_map_entry_t's.
13947          */
13948
13949         case VM_BEHAVIOR_DEFAULT:
13950         case VM_BEHAVIOR_RANDOM:
13951         case VM_BEHAVIOR_SEQUENTIAL:
13952         case VM_BEHAVIOR_RSEQNTL:
13953         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
13954                 vm_map_lock(map);
13955
13956                 /*
13957                  *      The entire address range must be valid for the map.
13958                  *      Note that vm_map_range_check() does a
13959                  *      vm_map_lookup_entry() internally and returns the
13960                  *      entry containing the start of the address range if
13961                  *      the entire range is valid.
13962                  */
13963                 if (vm_map_range_check(map, start, end, &temp_entry)) {
13964                         entry = temp_entry;
13965                         vm_map_clip_start(map, entry, start);
13966                 }
13967                 else {
13968                         vm_map_unlock(map);
13969                         return(KERN_INVALID_ADDRESS);
13970                 }
13971
13972                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
13973                         vm_map_clip_end(map, entry, end);
13974                         if (entry->is_sub_map) {
13975                                 assert(!entry->use_pmap);
13976                         }
13977
13978                         if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) {
13979                                 entry->zero_wired_pages = TRUE;
13980                         } else {
13981                                 entry->behavior = new_behavior;
13982                         }
13983                         entry = entry->vme_next;
13984                 }
13985
13986                 vm_map_unlock(map);
13987                 break;
13988
13989         /*
13990          * The rest of these are different from the above in that they cause
13991          * an immediate action to take place as opposed to setting a behavior that
13992          * affects future actions.
13993          */
13994
13995         case VM_BEHAVIOR_WILLNEED:
13996                 return vm_map_willneed(map, start, end);
13997
13998         case VM_BEHAVIOR_DONTNEED:
13999                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
14000
14001         case VM_BEHAVIOR_FREE:
14002                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
14003
14004         case VM_BEHAVIOR_REUSABLE:
14005                 return vm_map_reusable_pages(map, start, end);
14006
14007         case VM_BEHAVIOR_REUSE:
14008                 return vm_map_reuse_pages(map, start, end);
14009
14010         case VM_BEHAVIOR_CAN_REUSE:
14011                 return vm_map_can_reuse(map, start, end);
14012
14013 #if MACH_ASSERT
14014         case VM_BEHAVIOR_PAGEOUT:
14015                 return vm_map_pageout(map, start, end);
14016 #endif /* MACH_ASSERT */
14017
14018         default:
14019                 return(KERN_INVALID_ARGUMENT);
14020         }
14021
14022         return(KERN_SUCCESS);
14023 }
14024
14025
14026 /*
14027  * Internals for madvise(MADV_WILLNEED) system call.
14028  *
14029  * The present implementation is to do a read-ahead if the mapping corresponds
14030  * to a mapped regular file.  If it's an anonymous mapping, then we do nothing
14031  * and basically ignore the "advice" (which we are always free to do).
14032  */
14033
14034
14035 static kern_return_t
14036 vm_map_willneed(
14037         vm_map_t        map,
14038         vm_map_offset_t start,
14039         vm_map_offset_t end
14040 )
14041 {
14042         vm_map_entry_t                  entry;
14043         vm_object_t                     object;
14044         memory_object_t                 pager;
14045         struct vm_object_fault_info     fault_info;
14046         kern_return_t                   kr;
14047         vm_object_size_t                len;
14048         vm_object_offset_t              offset;
14049
14050         /*
14051          * Fill in static values in fault_info.  Several fields get ignored by the code
14052          * we call, but we'll fill them in anyway since uninitialized fields are bad
14053          * when it comes to future backwards compatibility.
14054          */
14055
14056         fault_info.interruptible = THREAD_UNINT;                /* ignored value */
14057         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
14058         fault_info.no_cache      = FALSE;                       /* ignored value */
14059         fault_info.stealth       = TRUE;
14060         fault_info.io_sync = FALSE;
14061         fault_info.cs_bypass = FALSE;
14062         fault_info.mark_zf_absent = FALSE;
14063         fault_info.batch_pmap_op = FALSE;
14064
14065         /*
14066          * The MADV_WILLNEED operation doesn't require any changes to the
14067          * vm_map_entry_t's, so the read lock is sufficient.
14068          */
14069
14070         vm_map_lock_read(map);
14071
14072         /*
14073          * The madvise semantics require that the address range be fully
14074          * allocated with no holes.  Otherwise, we're required to return
14075          * an error.
14076          */
14077
14078         if (! vm_map_range_check(map, start, end, &entry)) {
14079                 vm_map_unlock_read(map);
14080                 return KERN_INVALID_ADDRESS;
14081         }
14082
14083         /*
14084          * Examine each vm_map_entry_t in the range.
14085          */
14086         for (; entry != vm_map_to_entry(map) && start < end; ) {
14087
14088                 /*
14089                  * The first time through, the start address could be anywhere
14090                  * within the vm_map_entry we found.  So adjust the offset to
14091                  * correspond.  After that, the offset will always be zero to
14092                  * correspond to the beginning of the current vm_map_entry.
14093                  */
14094                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
14095
14096                 /*
14097                  * Set the length so we don't go beyond the end of the
14098                  * map_entry or beyond the end of the range we were given.
14099                  * This range could span also multiple map entries all of which
14100                  * map different files, so make sure we only do the right amount
14101                  * of I/O for each object.  Note that it's possible for there
14102                  * to be multiple map entries all referring to the same object
14103                  * but with different page permissions, but it's not worth
14104                  * trying to optimize that case.
14105                  */
14106                 len = MIN(entry->vme_end - start, end - start);
14107
14108                 if ((vm_size_t) len != len) {
14109                         /* 32-bit overflow */
14110                         len = (vm_size_t) (0 - PAGE_SIZE);
14111                 }
14112                 fault_info.cluster_size = (vm_size_t) len;
14113                 fault_info.lo_offset    = offset;
14114                 fault_info.hi_offset    = offset + len;
14115                 fault_info.user_tag     = VME_ALIAS(entry);
14116                 fault_info.pmap_options = 0;
14117                 if (entry->iokit_acct ||
14118                     (!entry->is_sub_map && !entry->use_pmap)) {
14119                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
14120                 }
14121
14122                 /*
14123                  * If there's no read permission to this mapping, then just
14124                  * skip it.
14125                  */
14126                 if ((entry->protection & VM_PROT_READ) == 0) {
14127                         entry = entry->vme_next;
14128                         start = entry->vme_start;
14129                         continue;
14130                 }
14131
14132                 /*
14133                  * Find the file object backing this map entry.  If there is
14134                  * none, then we simply ignore the "will need" advice for this
14135                  * entry and go on to the next one.
14136                  */
14137                 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
14138                         entry = entry->vme_next;
14139                         start = entry->vme_start;
14140                         continue;
14141                 }
14142
14143                 /*
14144                  * The data_request() could take a long time, so let's
14145                  * release the map lock to avoid blocking other threads.
14146                  */
14147                 vm_map_unlock_read(map);
14148
14149                 vm_object_paging_begin(object);
14150                 pager = object->pager;
14151                 vm_object_unlock(object);
14152
14153                 /*
14154                  * Get the data from the object asynchronously.
14155                  *
14156                  * Note that memory_object_data_request() places limits on the
14157                  * amount of I/O it will do.  Regardless of the len we
14158                  * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
14159                  * silently truncates the len to that size.  This isn't
14160                  * necessarily bad since madvise shouldn't really be used to
14161                  * page in unlimited amounts of data.  Other Unix variants
14162                  * limit the willneed case as well.  If this turns out to be an
14163                  * issue for developers, then we can always adjust the policy
14164                  * here and still be backwards compatible since this is all
14165                  * just "advice".
14166                  */
14167                 kr = memory_object_data_request(
14168                         pager,
14169                         offset + object->paging_offset,
14170                         0,      /* ignored */
14171                         VM_PROT_READ,
14172                         (memory_object_fault_info_t)&fault_info);
14173
14174                 vm_object_lock(object);
14175                 vm_object_paging_end(object);
14176                 vm_object_unlock(object);
14177
14178                 /*
14179                  * If we couldn't do the I/O for some reason, just give up on
14180                  * the madvise.  We still return success to the user since
14181                  * madvise isn't supposed to fail when the advice can't be
14182                  * taken.
14183                  */
14184                 if (kr != KERN_SUCCESS) {
14185                         return KERN_SUCCESS;
14186                 }
14187
14188                 start += len;
14189                 if (start >= end) {
14190                         /* done */
14191                         return KERN_SUCCESS;
14192                 }
14193
14194                 /* look up next entry */
14195                 vm_map_lock_read(map);
14196                 if (! vm_map_lookup_entry(map, start, &entry)) {
14197                         /*
14198                          * There's a new hole in the address range.
14199                          */
14200                         vm_map_unlock_read(map);
14201                         return KERN_INVALID_ADDRESS;
14202                 }
14203         }
14204
14205         vm_map_unlock_read(map);
14206         return KERN_SUCCESS;
14207 }
14208
14209 static boolean_t
14210 vm_map_entry_is_reusable(
14211         vm_map_entry_t entry)
14212 {
14213         /* Only user map entries */
14214
14215         vm_object_t object;
14216
14217         if (entry->is_sub_map) {
14218                 return FALSE;
14219         }
14220
14221         switch (VME_ALIAS(entry)) {
14222         case VM_MEMORY_MALLOC:
14223         case VM_MEMORY_MALLOC_SMALL:
14224         case VM_MEMORY_MALLOC_LARGE:
14225         case VM_MEMORY_REALLOC:
14226         case VM_MEMORY_MALLOC_TINY:
14227         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
14228         case VM_MEMORY_MALLOC_LARGE_REUSED:
14229                 /*
14230                  * This is a malloc() memory region: check if it's still
14231                  * in its original state and can be re-used for more
14232                  * malloc() allocations.
14233                  */
14234                 break;
14235         default:
14236                 /*
14237                  * Not a malloc() memory region: let the caller decide if
14238                  * it's re-usable.
14239                  */
14240                 return TRUE;
14241         }
14242
14243         if (entry->is_shared ||
14244             entry->is_sub_map ||
14245             entry->in_transition ||
14246             entry->protection != VM_PROT_DEFAULT ||
14247             entry->max_protection != VM_PROT_ALL ||
14248             entry->inheritance != VM_INHERIT_DEFAULT ||
14249             entry->no_cache ||
14250             entry->permanent ||
14251             entry->superpage_size != FALSE ||
14252             entry->zero_wired_pages ||
14253             entry->wired_count != 0 ||
14254             entry->user_wired_count != 0) {
14255                 return FALSE;
14256         }
14257
14258         object = VME_OBJECT(entry);
14259         if (object == VM_OBJECT_NULL) {
14260                 return TRUE;
14261         }
14262         if (
14263 #if 0
14264                 /*
14265                  * Let's proceed even if the VM object is potentially
14266                  * shared.
14267                  * We check for this later when processing the actual
14268                  * VM pages, so the contents will be safe if shared.
14269                  *
14270                  * But we can still mark this memory region as "reusable" to
14271                  * acknowledge that the caller did let us know that the memory
14272                  * could be re-used and should not be penalized for holding
14273                  * on to it.  This allows its "resident size" to not include
14274                  * the reusable range.
14275                  */
14276             object->ref_count == 1 &&
14277 #endif
14278             object->wired_page_count == 0 &&
14279             object->copy == VM_OBJECT_NULL &&
14280             object->shadow == VM_OBJECT_NULL &&
14281             object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
14282             object->internal &&
14283             !object->true_share &&
14284             object->wimg_bits == VM_WIMG_USE_DEFAULT &&
14285             !object->code_signed) {
14286                 return TRUE;
14287         }
14288         return FALSE;
14289
14290
14291 }
14292
14293 static kern_return_t
14294 vm_map_reuse_pages(
14295         vm_map_t        map,
14296         vm_map_offset_t start,
14297         vm_map_offset_t end)
14298 {
14299         vm_map_entry_t                  entry;
14300         vm_object_t                     object;
14301         vm_object_offset_t              start_offset, end_offset;
14302
14303         /*
14304          * The MADV_REUSE operation doesn't require any changes to the
14305          * vm_map_entry_t's, so the read lock is sufficient.
14306          */
14307
14308         vm_map_lock_read(map);
14309         assert(map->pmap != kernel_pmap);       /* protect alias access */
14310
14311         /*
14312          * The madvise semantics require that the address range be fully
14313          * allocated with no holes.  Otherwise, we're required to return
14314          * an error.
14315          */
14316
14317         if (!vm_map_range_check(map, start, end, &entry)) {
14318                 vm_map_unlock_read(map);
14319                 vm_page_stats_reusable.reuse_pages_failure++;
14320                 return KERN_INVALID_ADDRESS;
14321         }
14322
14323         /*
14324          * Examine each vm_map_entry_t in the range.
14325          */
14326         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14327              entry = entry->vme_next) {
14328                 /*
14329                  * Sanity check on the VM map entry.
14330                  */
14331                 if (! vm_map_entry_is_reusable(entry)) {
14332                         vm_map_unlock_read(map);
14333                         vm_page_stats_reusable.reuse_pages_failure++;
14334                         return KERN_INVALID_ADDRESS;
14335                 }
14336
14337                 /*
14338                  * The first time through, the start address could be anywhere
14339                  * within the vm_map_entry we found.  So adjust the offset to
14340                  * correspond.
14341                  */
14342                 if (entry->vme_start < start) {
14343                         start_offset = start - entry->vme_start;
14344                 } else {
14345                         start_offset = 0;
14346                 }
14347                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14348                 start_offset += VME_OFFSET(entry);
14349                 end_offset += VME_OFFSET(entry);
14350
14351                 assert(!entry->is_sub_map);
14352                 object = VME_OBJECT(entry);
14353                 if (object != VM_OBJECT_NULL) {
14354                         vm_object_lock(object);
14355                         vm_object_reuse_pages(object, start_offset, end_offset,
14356                                               TRUE);
14357                         vm_object_unlock(object);
14358                 }
14359
14360                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
14361                         /*
14362                          * XXX
14363                          * We do not hold the VM map exclusively here.
14364                          * The "alias" field is not that critical, so it's
14365                          * safe to update it here, as long as it is the only
14366                          * one that can be modified while holding the VM map
14367                          * "shared".
14368                          */
14369                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
14370                 }
14371         }
14372
14373         vm_map_unlock_read(map);
14374         vm_page_stats_reusable.reuse_pages_success++;
14375         return KERN_SUCCESS;
14376 }
14377
14378
14379 static kern_return_t
14380 vm_map_reusable_pages(
14381         vm_map_t        map,
14382         vm_map_offset_t start,
14383         vm_map_offset_t end)
14384 {
14385         vm_map_entry_t                  entry;
14386         vm_object_t                     object;
14387         vm_object_offset_t              start_offset, end_offset;
14388         vm_map_offset_t                 pmap_offset;
14389
14390         /*
14391          * The MADV_REUSABLE operation doesn't require any changes to the
14392          * vm_map_entry_t's, so the read lock is sufficient.
14393          */
14394
14395         vm_map_lock_read(map);
14396         assert(map->pmap != kernel_pmap);       /* protect alias access */
14397
14398         /*
14399          * The madvise semantics require that the address range be fully
14400          * allocated with no holes.  Otherwise, we're required to return
14401          * an error.
14402          */
14403
14404         if (!vm_map_range_check(map, start, end, &entry)) {
14405                 vm_map_unlock_read(map);
14406                 vm_page_stats_reusable.reusable_pages_failure++;
14407                 return KERN_INVALID_ADDRESS;
14408         }
14409
14410         /*
14411          * Examine each vm_map_entry_t in the range.
14412          */
14413         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14414              entry = entry->vme_next) {
14415                 int kill_pages = 0;
14416
14417                 /*
14418                  * Sanity check on the VM map entry.
14419                  */
14420                 if (! vm_map_entry_is_reusable(entry)) {
14421                         vm_map_unlock_read(map);
14422                         vm_page_stats_reusable.reusable_pages_failure++;
14423                         return KERN_INVALID_ADDRESS;
14424                 }
14425
14426                 if (! (entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
14427                         /* not writable: can't discard contents */
14428                         vm_map_unlock_read(map);
14429                         vm_page_stats_reusable.reusable_nonwritable++;
14430                         vm_page_stats_reusable.reusable_pages_failure++;
14431                         return KERN_PROTECTION_FAILURE;
14432                 }
14433
14434                 /*
14435                  * The first time through, the start address could be anywhere
14436                  * within the vm_map_entry we found.  So adjust the offset to
14437                  * correspond.
14438                  */
14439                 if (entry->vme_start < start) {
14440                         start_offset = start - entry->vme_start;
14441                         pmap_offset = start;
14442                 } else {
14443                         start_offset = 0;
14444                         pmap_offset = entry->vme_start;
14445                 }
14446                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
14447                 start_offset += VME_OFFSET(entry);
14448                 end_offset += VME_OFFSET(entry);
14449
14450                 assert(!entry->is_sub_map);
14451                 object = VME_OBJECT(entry);
14452                 if (object == VM_OBJECT_NULL)
14453                         continue;
14454
14455
14456                 vm_object_lock(object);
14457                 if (((object->ref_count == 1) ||
14458                      (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
14459                       object->copy == VM_OBJECT_NULL)) &&
14460                     object->shadow == VM_OBJECT_NULL &&
14461                     /*
14462                      * "iokit_acct" entries are billed for their virtual size
14463                      * (rather than for their resident pages only), so they
14464                      * wouldn't benefit from making pages reusable, and it
14465                      * would be hard to keep track of pages that are both
14466                      * "iokit_acct" and "reusable" in the pmap stats and
14467                      * ledgers.
14468                      */
14469                     !(entry->iokit_acct ||
14470                       (!entry->is_sub_map && !entry->use_pmap))) {
14471                         if (object->ref_count != 1) {
14472                                 vm_page_stats_reusable.reusable_shared++;
14473                         }
14474                         kill_pages = 1;
14475                 } else {
14476                         kill_pages = -1;
14477                 }
14478                 if (kill_pages != -1) {
14479                         vm_object_deactivate_pages(object,
14480                                                    start_offset,
14481                                                    end_offset - start_offset,
14482                                                    kill_pages,
14483                                                    TRUE /*reusable_pages*/,
14484                                                    map->pmap,
14485                                                    pmap_offset);
14486                 } else {
14487                         vm_page_stats_reusable.reusable_pages_shared++;
14488                 }
14489                 vm_object_unlock(object);
14490
14491                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
14492                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
14493                         /*
14494                          * XXX
14495                          * We do not hold the VM map exclusively here.
14496                          * The "alias" field is not that critical, so it's
14497                          * safe to update it here, as long as it is the only
14498                          * one that can be modified while holding the VM map
14499                          * "shared".
14500                          */
14501                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
14502                 }
14503         }
14504
14505         vm_map_unlock_read(map);
14506         vm_page_stats_reusable.reusable_pages_success++;
14507         return KERN_SUCCESS;
14508 }
14509
14510
14511 static kern_return_t
14512 vm_map_can_reuse(
14513         vm_map_t        map,
14514         vm_map_offset_t start,
14515         vm_map_offset_t end)
14516 {
14517         vm_map_entry_t                  entry;
14518
14519         /*
14520          * The MADV_REUSABLE operation doesn't require any changes to the
14521          * vm_map_entry_t's, so the read lock is sufficient.
14522          */
14523
14524         vm_map_lock_read(map);
14525         assert(map->pmap != kernel_pmap);       /* protect alias access */
14526
14527         /*
14528          * The madvise semantics require that the address range be fully
14529          * allocated with no holes.  Otherwise, we're required to return
14530          * an error.
14531          */
14532
14533         if (!vm_map_range_check(map, start, end, &entry)) {
14534                 vm_map_unlock_read(map);
14535                 vm_page_stats_reusable.can_reuse_failure++;
14536                 return KERN_INVALID_ADDRESS;
14537         }
14538
14539         /*
14540          * Examine each vm_map_entry_t in the range.
14541          */
14542         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14543              entry = entry->vme_next) {
14544                 /*
14545                  * Sanity check on the VM map entry.
14546                  */
14547                 if (! vm_map_entry_is_reusable(entry)) {
14548                         vm_map_unlock_read(map);
14549                         vm_page_stats_reusable.can_reuse_failure++;
14550                         return KERN_INVALID_ADDRESS;
14551                 }
14552         }
14553
14554         vm_map_unlock_read(map);
14555         vm_page_stats_reusable.can_reuse_success++;
14556         return KERN_SUCCESS;
14557 }
14558
14559
14560 #if MACH_ASSERT
14561 static kern_return_t
14562 vm_map_pageout(
14563         vm_map_t        map,
14564         vm_map_offset_t start,
14565         vm_map_offset_t end)
14566 {
14567         vm_map_entry_t                  entry;
14568
14569         /*
14570          * The MADV_PAGEOUT operation doesn't require any changes to the
14571          * vm_map_entry_t's, so the read lock is sufficient.
14572          */
14573
14574         vm_map_lock_read(map);
14575
14576         /*
14577          * The madvise semantics require that the address range be fully
14578          * allocated with no holes.  Otherwise, we're required to return
14579          * an error.
14580          */
14581
14582         if (!vm_map_range_check(map, start, end, &entry)) {
14583                 vm_map_unlock_read(map);
14584                 return KERN_INVALID_ADDRESS;
14585         }
14586
14587         /*
14588          * Examine each vm_map_entry_t in the range.
14589          */
14590         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
14591              entry = entry->vme_next) {
14592                 vm_object_t     object;
14593
14594                 /*
14595                  * Sanity check on the VM map entry.
14596                  */
14597                 if (entry->is_sub_map) {
14598                         vm_map_t submap;
14599                         vm_map_offset_t submap_start;
14600                         vm_map_offset_t submap_end;
14601                         vm_map_entry_t submap_entry;
14602
14603                         submap = VME_SUBMAP(entry);
14604                         submap_start = VME_OFFSET(entry);
14605                         submap_end = submap_start + (entry->vme_end -
14606                                                      entry->vme_start);
14607
14608                         vm_map_lock_read(submap);
14609
14610                         if (! vm_map_range_check(submap,
14611                                                  submap_start,
14612                                                  submap_end,
14613                                                  &submap_entry)) {
14614                                 vm_map_unlock_read(submap);
14615                                 vm_map_unlock_read(map);
14616                                 return KERN_INVALID_ADDRESS;
14617                         }
14618
14619                         object = VME_OBJECT(submap_entry);
14620                         if (submap_entry->is_sub_map ||
14621                             object == VM_OBJECT_NULL ||
14622                             !object->internal) {
14623                                 vm_map_unlock_read(submap);
14624                                 continue;
14625                         }
14626
14627                         vm_object_pageout(object);
14628
14629                         vm_map_unlock_read(submap);
14630                         submap = VM_MAP_NULL;
14631                         submap_entry = VM_MAP_ENTRY_NULL;
14632                         continue;
14633                 }
14634
14635                 object = VME_OBJECT(entry);
14636                 if (entry->is_sub_map ||
14637                     object == VM_OBJECT_NULL ||
14638                     !object->internal) {
14639                         continue;
14640                 }
14641
14642                 vm_object_pageout(object);
14643         }
14644
14645         vm_map_unlock_read(map);
14646         return KERN_SUCCESS;
14647 }
14648 #endif /* MACH_ASSERT */
14649
14650
14651 /*
14652  *      Routine:        vm_map_entry_insert
14653  *
14654  *      Descritpion:    This routine inserts a new vm_entry in a locked map.
14655  */
14656 vm_map_entry_t
14657 vm_map_entry_insert(
14658         vm_map_t                map,
14659         vm_map_entry_t          insp_entry,
14660         vm_map_offset_t         start,
14661         vm_map_offset_t         end,
14662         vm_object_t             object,
14663         vm_object_offset_t      offset,
14664         boolean_t               needs_copy,
14665         boolean_t               is_shared,
14666         boolean_t               in_transition,
14667         vm_prot_t               cur_protection,
14668         vm_prot_t               max_protection,
14669         vm_behavior_t           behavior,
14670         vm_inherit_t            inheritance,
14671         unsigned                wired_count,
14672         boolean_t               no_cache,
14673         boolean_t               permanent,
14674         unsigned int            superpage_size,
14675         boolean_t               clear_map_aligned,
14676         boolean_t               is_submap,
14677         boolean_t               used_for_jit,
14678         int                     alias)
14679 {
14680         vm_map_entry_t  new_entry;
14681
14682         assert(insp_entry != (vm_map_entry_t)0);
14683
14684         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
14685
14686         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
14687                 new_entry->map_aligned = TRUE;
14688         } else {
14689                 new_entry->map_aligned = FALSE;
14690         }
14691         if (clear_map_aligned &&
14692             (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
14693              ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
14694                 new_entry->map_aligned = FALSE;
14695         }
14696
14697         new_entry->vme_start = start;
14698         new_entry->vme_end = end;
14699         assert(page_aligned(new_entry->vme_start));
14700         assert(page_aligned(new_entry->vme_end));
14701         if (new_entry->map_aligned) {
14702                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
14703                                            VM_MAP_PAGE_MASK(map)));
14704                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
14705                                            VM_MAP_PAGE_MASK(map)));
14706         }
14707         assert(new_entry->vme_start < new_entry->vme_end);
14708
14709         VME_OBJECT_SET(new_entry, object);
14710         VME_OFFSET_SET(new_entry, offset);
14711         new_entry->is_shared = is_shared;
14712         new_entry->is_sub_map = is_submap;
14713         new_entry->needs_copy = needs_copy;
14714         new_entry->in_transition = in_transition;
14715         new_entry->needs_wakeup = FALSE;
14716         new_entry->inheritance = inheritance;
14717         new_entry->protection = cur_protection;
14718         new_entry->max_protection = max_protection;
14719         new_entry->behavior = behavior;
14720         new_entry->wired_count = wired_count;
14721         new_entry->user_wired_count = 0;
14722         if (is_submap) {
14723                 /*
14724                  * submap: "use_pmap" means "nested".
14725                  * default: false.
14726                  */
14727                 new_entry->use_pmap = FALSE;
14728         } else {
14729                 /*
14730                  * object: "use_pmap" means "use pmap accounting" for footprint.
14731                  * default: true.
14732                  */
14733                 new_entry->use_pmap = TRUE;
14734         }
14735         VME_ALIAS_SET(new_entry, alias);
14736         new_entry->zero_wired_pages = FALSE;
14737         new_entry->no_cache = no_cache;
14738         new_entry->permanent = permanent;
14739         if (superpage_size)
14740                 new_entry->superpage_size = TRUE;
14741         else
14742                 new_entry->superpage_size = FALSE;
14743         if (used_for_jit){
14744                 if (!(map->jit_entry_exists)){
14745                         new_entry->used_for_jit = TRUE;
14746                         map->jit_entry_exists = TRUE;
14747
14748                         /* Tell the pmap that it supports JIT. */
14749                         pmap_set_jit_entitled(map->pmap);
14750                 }
14751         } else {
14752                 new_entry->used_for_jit = FALSE;
14753         }
14754         new_entry->iokit_acct = FALSE;
14755         new_entry->vme_resilient_codesign = FALSE;
14756         new_entry->vme_resilient_media = FALSE;
14757         new_entry->vme_atomic = FALSE;
14758
14759         /*
14760          *      Insert the new entry into the list.
14761          */
14762
14763         vm_map_store_entry_link(map, insp_entry, new_entry);
14764         map->size += end - start;
14765
14766         /*
14767          *      Update the free space hint and the lookup hint.
14768          */
14769
14770         SAVE_HINT_MAP_WRITE(map, new_entry);
14771         return new_entry;
14772 }
14773
14774 /*
14775  *      Routine:        vm_map_remap_extract
14776  *
14777  *      Descritpion:    This routine returns a vm_entry list from a map.
14778  */
14779 static kern_return_t
14780 vm_map_remap_extract(
14781         vm_map_t                map,
14782         vm_map_offset_t         addr,
14783         vm_map_size_t           size,
14784         boolean_t               copy,
14785         struct vm_map_header    *map_header,
14786         vm_prot_t               *cur_protection,
14787         vm_prot_t               *max_protection,
14788         /* What, no behavior? */
14789         vm_inherit_t            inheritance,
14790         boolean_t               pageable,
14791         boolean_t               same_map,
14792         vm_map_kernel_flags_t   vmk_flags)
14793 {
14794         kern_return_t           result;
14795         vm_map_size_t           mapped_size;
14796         vm_map_size_t           tmp_size;
14797         vm_map_entry_t          src_entry;     /* result of last map lookup */
14798         vm_map_entry_t          new_entry;
14799         vm_object_offset_t      offset;
14800         vm_map_offset_t         map_address;
14801         vm_map_offset_t         src_start;     /* start of entry to map */
14802         vm_map_offset_t         src_end;       /* end of region to be mapped */
14803         vm_object_t             object;
14804         vm_map_version_t        version;
14805         boolean_t               src_needs_copy;
14806         boolean_t               new_entry_needs_copy;
14807
14808         assert(map != VM_MAP_NULL);
14809         assert(size != 0);
14810         assert(size == vm_map_round_page(size, PAGE_MASK));
14811         assert(inheritance == VM_INHERIT_NONE ||
14812                inheritance == VM_INHERIT_COPY ||
14813                inheritance == VM_INHERIT_SHARE);
14814
14815         /*
14816          *      Compute start and end of region.
14817          */
14818         src_start = vm_map_trunc_page(addr, PAGE_MASK);
14819         src_end = vm_map_round_page(src_start + size, PAGE_MASK);
14820
14821
14822         /*
14823          *      Initialize map_header.
14824          */
14825         map_header->links.next = (struct vm_map_entry *)&map_header->links;
14826         map_header->links.prev = (struct vm_map_entry *)&map_header->links;
14827         map_header->nentries = 0;
14828         map_header->entries_pageable = pageable;
14829         map_header->page_shift = PAGE_SHIFT;
14830
14831         vm_map_store_init( map_header );
14832
14833         *cur_protection = VM_PROT_ALL;
14834         *max_protection = VM_PROT_ALL;
14835
14836         map_address = 0;
14837         mapped_size = 0;
14838         result = KERN_SUCCESS;
14839
14840         /*
14841          *      The specified source virtual space might correspond to
14842          *      multiple map entries, need to loop on them.
14843          */
14844         vm_map_lock(map);
14845         while (mapped_size != size) {
14846                 vm_map_size_t   entry_size;
14847
14848                 /*
14849                  *      Find the beginning of the region.
14850                  */
14851                 if (! vm_map_lookup_entry(map, src_start, &src_entry)) {
14852                         result = KERN_INVALID_ADDRESS;
14853                         break;
14854                 }
14855
14856                 if (src_start < src_entry->vme_start ||
14857                     (mapped_size && src_start != src_entry->vme_start)) {
14858                         result = KERN_INVALID_ADDRESS;
14859                         break;
14860                 }
14861
14862                 tmp_size = size - mapped_size;
14863                 if (src_end > src_entry->vme_end)
14864                         tmp_size -= (src_end - src_entry->vme_end);
14865
14866                 entry_size = (vm_map_size_t)(src_entry->vme_end -
14867                                              src_entry->vme_start);
14868
14869                 if(src_entry->is_sub_map) {
14870                         vm_map_reference(VME_SUBMAP(src_entry));
14871                         object = VM_OBJECT_NULL;
14872                 } else {
14873                         object = VME_OBJECT(src_entry);
14874                         if (src_entry->iokit_acct) {
14875                                 /*
14876                                  * This entry uses "IOKit accounting".
14877                                  */
14878                         } else if (object != VM_OBJECT_NULL &&
14879                                    object->purgable != VM_PURGABLE_DENY) {
14880                                 /*
14881                                  * Purgeable objects have their own accounting:
14882                                  * no pmap accounting for them.
14883                                  */
14884                                 assert(!src_entry->use_pmap);
14885                         } else {
14886                                 /*
14887                                  * Not IOKit or purgeable:
14888                                  * must be accounted by pmap stats.
14889                                  */
14890                                 assert(src_entry->use_pmap);
14891                         }
14892
14893                         if (object == VM_OBJECT_NULL) {
14894                                 object = vm_object_allocate(entry_size);
14895                                 VME_OFFSET_SET(src_entry, 0);
14896                                 VME_OBJECT_SET(src_entry, object);
14897                         } else if (object->copy_strategy !=
14898                                    MEMORY_OBJECT_COPY_SYMMETRIC) {
14899                                 /*
14900                                  *      We are already using an asymmetric
14901                                  *      copy, and therefore we already have
14902                                  *      the right object.
14903                                  */
14904                                 assert(!src_entry->needs_copy);
14905                         } else if (src_entry->needs_copy || object->shadowed ||
14906                                    (object->internal && !object->true_share &&
14907                                     !src_entry->is_shared &&
14908                                     object->vo_size > entry_size)) {
14909
14910                                 VME_OBJECT_SHADOW(src_entry, entry_size);
14911
14912                                 if (!src_entry->needs_copy &&
14913                                     (src_entry->protection & VM_PROT_WRITE)) {
14914                                         vm_prot_t prot;
14915
14916                                         assert(!pmap_has_prot_policy(src_entry->protection));
14917
14918                                         prot = src_entry->protection & ~VM_PROT_WRITE;
14919
14920                                         if (override_nx(map,
14921                                                         VME_ALIAS(src_entry))
14922                                             && prot)
14923                                                 prot |= VM_PROT_EXECUTE;
14924
14925                                         assert(!pmap_has_prot_policy(prot));
14926
14927                                         if(map->mapped_in_other_pmaps) {
14928                                                 vm_object_pmap_protect(
14929                                                         VME_OBJECT(src_entry),
14930                                                         VME_OFFSET(src_entry),
14931                                                         entry_size,
14932                                                         PMAP_NULL,
14933                                                         src_entry->vme_start,
14934                                                         prot);
14935                                         } else {
14936                                                 pmap_protect(vm_map_pmap(map),
14937                                                              src_entry->vme_start,
14938                                                              src_entry->vme_end,
14939                                                              prot);
14940                                         }
14941                                 }
14942
14943                                 object = VME_OBJECT(src_entry);
14944                                 src_entry->needs_copy = FALSE;
14945                         }
14946
14947
14948                         vm_object_lock(object);
14949                         vm_object_reference_locked(object); /* object ref. for new entry */
14950                         if (object->copy_strategy ==
14951                             MEMORY_OBJECT_COPY_SYMMETRIC) {
14952                                 object->copy_strategy =
14953                                         MEMORY_OBJECT_COPY_DELAY;
14954                         }
14955                         vm_object_unlock(object);
14956                 }
14957
14958                 offset = (VME_OFFSET(src_entry) +
14959                           (src_start - src_entry->vme_start));
14960
14961                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
14962                 vm_map_entry_copy(new_entry, src_entry);
14963                 if (new_entry->is_sub_map) {
14964                         /* clr address space specifics */
14965                         new_entry->use_pmap = FALSE;
14966                 }
14967
14968                 new_entry->map_aligned = FALSE;
14969
14970                 new_entry->vme_start = map_address;
14971                 new_entry->vme_end = map_address + tmp_size;
14972                 assert(new_entry->vme_start < new_entry->vme_end);
14973                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
14974                         /*
14975                          * Remapping for vm_map_protect(VM_PROT_COPY)
14976                          * to convert a read-only mapping into a
14977                          * copy-on-write version of itself but
14978                          * with write access:
14979                          * keep the original inheritance and add
14980                          * VM_PROT_WRITE to the max protection.
14981                          */
14982                         new_entry->inheritance = src_entry->inheritance;
14983                         new_entry->max_protection |= VM_PROT_WRITE;
14984                 } else {
14985                         new_entry->inheritance = inheritance;
14986                 }
14987                 VME_OFFSET_SET(new_entry, offset);
14988
14989                 /*
14990                  * The new region has to be copied now if required.
14991                  */
14992         RestartCopy:
14993                 if (!copy) {
14994                         /*
14995                          * Cannot allow an entry describing a JIT
14996                          * region to be shared across address spaces.
14997                          */
14998                         if (src_entry->used_for_jit == TRUE && !same_map) {
14999                                 result = KERN_INVALID_ARGUMENT;
15000                                 break;
15001                         }
15002                         src_entry->is_shared = TRUE;
15003                         new_entry->is_shared = TRUE;
15004                         if (!(new_entry->is_sub_map))
15005                                 new_entry->needs_copy = FALSE;
15006
15007                 } else if (src_entry->is_sub_map) {
15008                         /* make this a COW sub_map if not already */
15009                         assert(new_entry->wired_count == 0);
15010                         new_entry->needs_copy = TRUE;
15011                         object = VM_OBJECT_NULL;
15012                 } else if (src_entry->wired_count == 0 &&
15013                            vm_object_copy_quickly(&VME_OBJECT(new_entry),
15014                                                   VME_OFFSET(new_entry),
15015                                                   (new_entry->vme_end -
15016                                                    new_entry->vme_start),
15017                                                   &src_needs_copy,
15018                                                   &new_entry_needs_copy)) {
15019
15020                         new_entry->needs_copy = new_entry_needs_copy;
15021                         new_entry->is_shared = FALSE;
15022
15023                         /*
15024                          * Handle copy_on_write semantics.
15025                          */
15026                         if (src_needs_copy && !src_entry->needs_copy) {
15027                                 vm_prot_t prot;
15028
15029                                 assert(!pmap_has_prot_policy(src_entry->protection));
15030
15031                                 prot = src_entry->protection & ~VM_PROT_WRITE;
15032
15033                                 if (override_nx(map,
15034                                                 VME_ALIAS(src_entry))
15035                                     && prot)
15036                                         prot |= VM_PROT_EXECUTE;
15037
15038                                 assert(!pmap_has_prot_policy(prot));
15039
15040                                 vm_object_pmap_protect(object,
15041                                                        offset,
15042                                                        entry_size,
15043                                                        ((src_entry->is_shared
15044                                                          || map->mapped_in_other_pmaps) ?
15045                                                         PMAP_NULL : map->pmap),
15046                                                        src_entry->vme_start,
15047                                                        prot);
15048
15049                                 assert(src_entry->wired_count == 0);
15050                                 src_entry->needs_copy = TRUE;
15051                         }
15052                         /*
15053                          * Throw away the old object reference of the new entry.
15054                          */
15055                         vm_object_deallocate(object);
15056
15057                 } else {
15058                         new_entry->is_shared = FALSE;
15059
15060                         /*
15061                          * The map can be safely unlocked since we
15062                          * already hold a reference on the object.
15063                          *
15064                          * Record the timestamp of the map for later
15065                          * verification, and unlock the map.
15066                          */
15067                         version.main_timestamp = map->timestamp;
15068                         vm_map_unlock(map);     /* Increments timestamp once! */
15069
15070                         /*
15071                          * Perform the copy.
15072                          */
15073                         if (src_entry->wired_count > 0) {
15074                                 vm_object_lock(object);
15075                                 result = vm_object_copy_slowly(
15076                                         object,
15077                                         offset,
15078                                         (new_entry->vme_end -
15079                                         new_entry->vme_start),
15080                                         THREAD_UNINT,
15081                                         &VME_OBJECT(new_entry));
15082
15083                                 VME_OFFSET_SET(new_entry, 0);
15084                                 new_entry->needs_copy = FALSE;
15085                         } else {
15086                                 vm_object_offset_t new_offset;
15087
15088                                 new_offset = VME_OFFSET(new_entry);
15089                                 result = vm_object_copy_strategically(
15090                                         object,
15091                                         offset,
15092                                         (new_entry->vme_end -
15093                                         new_entry->vme_start),
15094                                         &VME_OBJECT(new_entry),
15095                                         &new_offset,
15096                                         &new_entry_needs_copy);
15097                                 if (new_offset != VME_OFFSET(new_entry)) {
15098                                         VME_OFFSET_SET(new_entry, new_offset);
15099                                 }
15100
15101                                 new_entry->needs_copy = new_entry_needs_copy;
15102                         }
15103
15104                         /*
15105                          * Throw away the old object reference of the new entry.
15106                          */
15107                         vm_object_deallocate(object);
15108
15109                         if (result != KERN_SUCCESS &&
15110                             result != KERN_MEMORY_RESTART_COPY) {
15111                                 _vm_map_entry_dispose(map_header, new_entry);
15112                                 vm_map_lock(map);
15113                                 break;
15114                         }
15115
15116                         /*
15117                          * Verify that the map has not substantially
15118                          * changed while the copy was being made.
15119                          */
15120
15121                         vm_map_lock(map);
15122                         if (version.main_timestamp + 1 != map->timestamp) {
15123                                 /*
15124                                  * Simple version comparison failed.
15125                                  *
15126                                  * Retry the lookup and verify that the
15127                                  * same object/offset are still present.
15128                                  */
15129                                 vm_object_deallocate(VME_OBJECT(new_entry));
15130                                 _vm_map_entry_dispose(map_header, new_entry);
15131                                 if (result == KERN_MEMORY_RESTART_COPY)
15132                                         result = KERN_SUCCESS;
15133                                 continue;
15134                         }
15135
15136                         if (result == KERN_MEMORY_RESTART_COPY) {
15137                                 vm_object_reference(object);
15138                                 goto RestartCopy;
15139                         }
15140                 }
15141
15142                 _vm_map_store_entry_link(map_header,
15143                                    map_header->links.prev, new_entry);
15144
15145                 /*Protections for submap mapping are irrelevant here*/
15146                 if( !src_entry->is_sub_map ) {
15147                         *cur_protection &= src_entry->protection;
15148                         *max_protection &= src_entry->max_protection;
15149                 }
15150                 map_address += tmp_size;
15151                 mapped_size += tmp_size;
15152                 src_start += tmp_size;
15153
15154         } /* end while */
15155
15156         vm_map_unlock(map);
15157         if (result != KERN_SUCCESS) {
15158                 /*
15159                  * Free all allocated elements.
15160                  */
15161                 for (src_entry = map_header->links.next;
15162                      src_entry != (struct vm_map_entry *)&map_header->links;
15163                      src_entry = new_entry) {
15164                         new_entry = src_entry->vme_next;
15165                         _vm_map_store_entry_unlink(map_header, src_entry);
15166                         if (src_entry->is_sub_map) {
15167                                 vm_map_deallocate(VME_SUBMAP(src_entry));
15168                         } else {
15169                                 vm_object_deallocate(VME_OBJECT(src_entry));
15170                         }
15171                         _vm_map_entry_dispose(map_header, src_entry);
15172                 }
15173         }
15174         return result;
15175 }
15176
15177 /*
15178  *      Routine:        vm_remap
15179  *
15180  *                      Map portion of a task's address space.
15181  *                      Mapped region must not overlap more than
15182  *                      one vm memory object. Protections and
15183  *                      inheritance attributes remain the same
15184  *                      as in the original task and are out parameters.
15185  *                      Source and Target task can be identical
15186  *                      Other attributes are identical as for vm_map()
15187  */
15188 kern_return_t
15189 vm_map_remap(
15190         vm_map_t                target_map,
15191         vm_map_address_t        *address,
15192         vm_map_size_t           size,
15193         vm_map_offset_t         mask,
15194         int                     flags,
15195         vm_map_kernel_flags_t   vmk_flags,
15196         vm_tag_t                tag,
15197         vm_map_t                src_map,
15198         vm_map_offset_t         memory_address,
15199         boolean_t               copy,
15200         vm_prot_t               *cur_protection,
15201         vm_prot_t               *max_protection,
15202         vm_inherit_t            inheritance)
15203 {
15204         kern_return_t           result;
15205         vm_map_entry_t          entry;
15206         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
15207         vm_map_entry_t          new_entry;
15208         struct vm_map_header    map_header;
15209         vm_map_offset_t         offset_in_mapping;
15210
15211         if (target_map == VM_MAP_NULL)
15212                 return KERN_INVALID_ARGUMENT;
15213
15214         switch (inheritance) {
15215         case VM_INHERIT_NONE:
15216         case VM_INHERIT_COPY:
15217         case VM_INHERIT_SHARE:
15218                 if (size != 0 && src_map != VM_MAP_NULL)
15219                         break;
15220                 /*FALL THRU*/
15221         default:
15222                 return KERN_INVALID_ARGUMENT;
15223         }
15224
15225         /*
15226          * If the user is requesting that we return the address of the
15227          * first byte of the data (rather than the base of the page),
15228          * then we use different rounding semantics: specifically,
15229          * we assume that (memory_address, size) describes a region
15230          * all of whose pages we must cover, rather than a base to be truncated
15231          * down and a size to be added to that base.  So we figure out
15232          * the highest page that the requested region includes and make
15233          * sure that the size will cover it.
15234          *
15235          * The key example we're worried about it is of the form:
15236          *
15237          *              memory_address = 0x1ff0, size = 0x20
15238          *
15239          * With the old semantics, we round down the memory_address to 0x1000
15240          * and round up the size to 0x1000, resulting in our covering *only*
15241          * page 0x1000.  With the new semantics, we'd realize that the region covers
15242          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
15243          * 0x1000 and page 0x2000 in the region we remap.
15244          */
15245         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15246                 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK);
15247                 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK);
15248         } else {
15249                 size = vm_map_round_page(size, PAGE_MASK);
15250         }
15251         if (size == 0) {
15252                 return KERN_INVALID_ARGUMENT;
15253         }
15254
15255         result = vm_map_remap_extract(src_map, memory_address,
15256                                       size, copy, &map_header,
15257                                       cur_protection,
15258                                       max_protection,
15259                                       inheritance,
15260                                       target_map->hdr.entries_pageable,
15261                                       src_map == target_map,
15262                                       vmk_flags);
15263
15264         if (result != KERN_SUCCESS) {
15265                 return result;
15266         }
15267
15268         /*
15269          * Allocate/check a range of free virtual address
15270          * space for the target
15271          */
15272         *address = vm_map_trunc_page(*address,
15273                                      VM_MAP_PAGE_MASK(target_map));
15274         vm_map_lock(target_map);
15275         result = vm_map_remap_range_allocate(target_map, address, size,
15276                                              mask, flags, vmk_flags, tag,
15277                                              &insp_entry);
15278
15279         for (entry = map_header.links.next;
15280              entry != (struct vm_map_entry *)&map_header.links;
15281              entry = new_entry) {
15282                 new_entry = entry->vme_next;
15283                 _vm_map_store_entry_unlink(&map_header, entry);
15284                 if (result == KERN_SUCCESS) {
15285                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15286                                 /* no codesigning -> read-only access */
15287                                 assert(!entry->used_for_jit);
15288                                 entry->max_protection = VM_PROT_READ;
15289                                 entry->protection = VM_PROT_READ;
15290                                 entry->vme_resilient_codesign = TRUE;
15291                         }
15292                         entry->vme_start += *address;
15293                         entry->vme_end += *address;
15294                         assert(!entry->map_aligned);
15295                         vm_map_store_entry_link(target_map, insp_entry, entry);
15296                         insp_entry = entry;
15297                 } else {
15298                         if (!entry->is_sub_map) {
15299                                 vm_object_deallocate(VME_OBJECT(entry));
15300                         } else {
15301                                 vm_map_deallocate(VME_SUBMAP(entry));
15302                         }
15303                         _vm_map_entry_dispose(&map_header, entry);
15304                 }
15305         }
15306
15307         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
15308                 *cur_protection = VM_PROT_READ;
15309                 *max_protection = VM_PROT_READ;
15310         }
15311
15312         if( target_map->disable_vmentry_reuse == TRUE) {
15313                 assert(!target_map->is_nested_map);
15314                 if( target_map->highest_entry_end < insp_entry->vme_end ){
15315                         target_map->highest_entry_end = insp_entry->vme_end;
15316                 }
15317         }
15318
15319         if (result == KERN_SUCCESS) {
15320                 target_map->size += size;
15321                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
15322
15323         }
15324         vm_map_unlock(target_map);
15325
15326         if (result == KERN_SUCCESS && target_map->wiring_required)
15327                 result = vm_map_wire_kernel(target_map, *address,
15328                                      *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
15329                                      TRUE);
15330
15331         /*
15332          * If requested, return the address of the data pointed to by the
15333          * request, rather than the base of the resulting page.
15334          */
15335         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
15336                 *address += offset_in_mapping;
15337         }
15338
15339         return result;
15340 }
15341
15342 /*
15343  *      Routine:        vm_map_remap_range_allocate
15344  *
15345  *      Description:
15346  *              Allocate a range in the specified virtual address map.
15347  *              returns the address and the map entry just before the allocated
15348  *              range
15349  *
15350  *      Map must be locked.
15351  */
15352
15353 static kern_return_t
15354 vm_map_remap_range_allocate(
15355         vm_map_t                map,
15356         vm_map_address_t        *address,       /* IN/OUT */
15357         vm_map_size_t           size,
15358         vm_map_offset_t         mask,
15359         int                     flags,
15360         __unused vm_map_kernel_flags_t  vmk_flags,
15361         __unused vm_tag_t       tag,
15362         vm_map_entry_t          *map_entry)     /* OUT */
15363 {
15364         vm_map_entry_t  entry;
15365         vm_map_offset_t start;
15366         vm_map_offset_t end;
15367         kern_return_t   kr;
15368         vm_map_entry_t          hole_entry;
15369
15370 StartAgain: ;
15371
15372         start = *address;
15373
15374         if (flags & VM_FLAGS_ANYWHERE)
15375         {
15376                 if (flags & VM_FLAGS_RANDOM_ADDR)
15377                 {
15378                         /*
15379                          * Get a random start address.
15380                          */
15381                         kr = vm_map_random_address_for_size(map, address, size);
15382                         if (kr != KERN_SUCCESS) {
15383                                 return(kr);
15384                         }
15385                         start = *address;
15386                 }
15387
15388                 /*
15389                  *      Calculate the first possible address.
15390                  */
15391
15392                 if (start < map->min_offset)
15393                         start = map->min_offset;
15394                 if (start > map->max_offset)
15395                         return(KERN_NO_SPACE);
15396
15397                 /*
15398                  *      Look for the first possible address;
15399                  *      if there's already something at this
15400                  *      address, we have to start after it.
15401                  */
15402
15403                 if( map->disable_vmentry_reuse == TRUE) {
15404                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
15405                 } else {
15406
15407                         if (map->holelistenabled) {
15408                                 hole_entry = (vm_map_entry_t)map->holes_list;
15409
15410                                 if (hole_entry == NULL) {
15411                                         /*
15412                                          * No more space in the map?
15413                                          */
15414                                         return(KERN_NO_SPACE);
15415                                 } else {
15416
15417                                         boolean_t found_hole = FALSE;
15418
15419                                         do {
15420                                                 if (hole_entry->vme_start >= start) {
15421                                                         start = hole_entry->vme_start;
15422                                                         found_hole = TRUE;
15423                                                         break;
15424                                                 }
15425
15426                                                 if (hole_entry->vme_end > start) {
15427                                                         found_hole = TRUE;
15428                                                         break;
15429                                                 }
15430                                                 hole_entry = hole_entry->vme_next;
15431
15432                                         } while (hole_entry != (vm_map_entry_t) map->holes_list);
15433
15434                                         if (found_hole == FALSE) {
15435                                                 return (KERN_NO_SPACE);
15436                                         }
15437
15438                                         entry = hole_entry;
15439                                 }
15440                         } else {
15441                                 assert(first_free_is_valid(map));
15442                                 if (start == map->min_offset) {
15443                                         if ((entry = map->first_free) != vm_map_to_entry(map))
15444                                                 start = entry->vme_end;
15445                                 } else {
15446                                         vm_map_entry_t  tmp_entry;
15447                                         if (vm_map_lookup_entry(map, start, &tmp_entry))
15448                                                 start = tmp_entry->vme_end;
15449                                         entry = tmp_entry;
15450                                 }
15451                         }
15452                         start = vm_map_round_page(start,
15453                                                   VM_MAP_PAGE_MASK(map));
15454                 }
15455
15456                 /*
15457                  *      In any case, the "entry" always precedes
15458                  *      the proposed new region throughout the
15459                  *      loop:
15460                  */
15461
15462                 while (TRUE) {
15463                         vm_map_entry_t  next;
15464
15465                         /*
15466                          *      Find the end of the proposed new region.
15467                          *      Be sure we didn't go beyond the end, or
15468                          *      wrap around the address.
15469                          */
15470
15471                         end = ((start + mask) & ~mask);
15472                         end = vm_map_round_page(end,
15473                                                 VM_MAP_PAGE_MASK(map));
15474                         if (end < start)
15475                                 return(KERN_NO_SPACE);
15476                         start = end;
15477                         end += size;
15478
15479                         if ((end > map->max_offset) || (end < start)) {
15480                                 if (map->wait_for_space) {
15481                                         if (size <= (map->max_offset -
15482                                                      map->min_offset)) {
15483                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
15484                                                 vm_map_unlock(map);
15485                                                 thread_block(THREAD_CONTINUE_NULL);
15486                                                 vm_map_lock(map);
15487                                                 goto StartAgain;
15488                                         }
15489                                 }
15490
15491                                 return(KERN_NO_SPACE);
15492                         }
15493
15494                         next = entry->vme_next;
15495
15496                         if (map->holelistenabled) {
15497                                 if (entry->vme_end >= end)
15498                                         break;
15499                         } else {
15500                                 /*
15501                                  *      If there are no more entries, we must win.
15502                                  *
15503                                  *      OR
15504                                  *
15505                                  *      If there is another entry, it must be
15506                                  *      after the end of the potential new region.
15507                                  */
15508
15509                                 if (next == vm_map_to_entry(map))
15510                                         break;
15511
15512                                 if (next->vme_start >= end)
15513                                         break;
15514                         }
15515
15516                         /*
15517                          *      Didn't fit -- move to the next entry.
15518                          */
15519
15520                         entry = next;
15521
15522                         if (map->holelistenabled) {
15523                                 if (entry == (vm_map_entry_t) map->holes_list) {
15524                                         /*
15525                                          * Wrapped around
15526                                          */
15527                                         return(KERN_NO_SPACE);
15528                                 }
15529                                 start = entry->vme_start;
15530                         } else {
15531                                 start = entry->vme_end;
15532                         }
15533                 }
15534
15535                 if (map->holelistenabled) {
15536
15537                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
15538                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
15539                         }
15540                 }
15541
15542                 *address = start;
15543
15544         } else {
15545                 vm_map_entry_t          temp_entry;
15546
15547                 /*
15548                  *      Verify that:
15549                  *              the address doesn't itself violate
15550                  *              the mask requirement.
15551                  */
15552
15553                 if ((start & mask) != 0)
15554                         return(KERN_NO_SPACE);
15555
15556
15557                 /*
15558                  *      ...     the address is within bounds
15559                  */
15560
15561                 end = start + size;
15562
15563                 if ((start < map->min_offset) ||
15564                     (end > map->max_offset) ||
15565                     (start >= end)) {
15566                         return(KERN_INVALID_ADDRESS);
15567                 }
15568
15569                 /*
15570                  * If we're asked to overwrite whatever was mapped in that
15571                  * range, first deallocate that range.
15572                  */
15573                 if (flags & VM_FLAGS_OVERWRITE) {
15574                         vm_map_t zap_map;
15575
15576                         /*
15577                          * We use a "zap_map" to avoid having to unlock
15578                          * the "map" in vm_map_delete(), which would compromise
15579                          * the atomicity of the "deallocate" and then "remap"
15580                          * combination.
15581                          */
15582                         zap_map = vm_map_create(PMAP_NULL,
15583                                                 start,
15584                                                 end,
15585                                                 map->hdr.entries_pageable);
15586                         if (zap_map == VM_MAP_NULL) {
15587                                 return KERN_RESOURCE_SHORTAGE;
15588                         }
15589                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
15590                         vm_map_disable_hole_optimization(zap_map);
15591
15592                         kr = vm_map_delete(map, start, end,
15593                                            (VM_MAP_REMOVE_SAVE_ENTRIES |
15594                                             VM_MAP_REMOVE_NO_MAP_ALIGN),
15595                                            zap_map);
15596                         if (kr == KERN_SUCCESS) {
15597                                 vm_map_destroy(zap_map,
15598                                                VM_MAP_REMOVE_NO_PMAP_CLEANUP);
15599                                 zap_map = VM_MAP_NULL;
15600                         }
15601                 }
15602
15603                 /*
15604                  *      ...     the starting address isn't allocated
15605                  */
15606
15607                 if (vm_map_lookup_entry(map, start, &temp_entry))
15608                         return(KERN_NO_SPACE);
15609
15610                 entry = temp_entry;
15611
15612                 /*
15613                  *      ...     the next region doesn't overlap the
15614                  *              end point.
15615                  */
15616
15617                 if ((entry->vme_next != vm_map_to_entry(map)) &&
15618                     (entry->vme_next->vme_start < end))
15619                         return(KERN_NO_SPACE);
15620         }
15621         *map_entry = entry;
15622         return(KERN_SUCCESS);
15623 }
15624
15625 /*
15626  *      vm_map_switch:
15627  *
15628  *      Set the address map for the current thread to the specified map
15629  */
15630
15631 vm_map_t
15632 vm_map_switch(
15633         vm_map_t        map)
15634 {
15635         int             mycpu;
15636         thread_t        thread = current_thread();
15637         vm_map_t        oldmap = thread->map;
15638
15639         mp_disable_preemption();
15640         mycpu = cpu_number();
15641
15642         /*
15643          *      Deactivate the current map and activate the requested map
15644          */
15645         PMAP_SWITCH_USER(thread, map, mycpu);
15646
15647         mp_enable_preemption();
15648         return(oldmap);
15649 }
15650
15651
15652 /*
15653  *      Routine:        vm_map_write_user
15654  *
15655  *      Description:
15656  *              Copy out data from a kernel space into space in the
15657  *              destination map. The space must already exist in the
15658  *              destination map.
15659  *              NOTE:  This routine should only be called by threads
15660  *              which can block on a page fault. i.e. kernel mode user
15661  *              threads.
15662  *
15663  */
15664 kern_return_t
15665 vm_map_write_user(
15666         vm_map_t                map,
15667         void                    *src_p,
15668         vm_map_address_t        dst_addr,
15669         vm_size_t               size)
15670 {
15671         kern_return_t   kr = KERN_SUCCESS;
15672
15673         if(current_map() == map) {
15674                 if (copyout(src_p, dst_addr, size)) {
15675                         kr = KERN_INVALID_ADDRESS;
15676                 }
15677         } else {
15678                 vm_map_t        oldmap;
15679
15680                 /* take on the identity of the target map while doing */
15681                 /* the transfer */
15682
15683                 vm_map_reference(map);
15684                 oldmap = vm_map_switch(map);
15685                 if (copyout(src_p, dst_addr, size)) {
15686                         kr = KERN_INVALID_ADDRESS;
15687                 }
15688                 vm_map_switch(oldmap);
15689                 vm_map_deallocate(map);
15690         }
15691         return kr;
15692 }
15693
15694 /*
15695  *      Routine:        vm_map_read_user
15696  *
15697  *      Description:
15698  *              Copy in data from a user space source map into the
15699  *              kernel map. The space must already exist in the
15700  *              kernel map.
15701  *              NOTE:  This routine should only be called by threads
15702  *              which can block on a page fault. i.e. kernel mode user
15703  *              threads.
15704  *
15705  */
15706 kern_return_t
15707 vm_map_read_user(
15708         vm_map_t                map,
15709         vm_map_address_t        src_addr,
15710         void                    *dst_p,
15711         vm_size_t               size)
15712 {
15713         kern_return_t   kr = KERN_SUCCESS;
15714
15715         if(current_map() == map) {
15716                 if (copyin(src_addr, dst_p, size)) {
15717                         kr = KERN_INVALID_ADDRESS;
15718                 }
15719         } else {
15720                 vm_map_t        oldmap;
15721
15722                 /* take on the identity of the target map while doing */
15723                 /* the transfer */
15724
15725                 vm_map_reference(map);
15726                 oldmap = vm_map_switch(map);
15727                 if (copyin(src_addr, dst_p, size)) {
15728                         kr = KERN_INVALID_ADDRESS;
15729                 }
15730                 vm_map_switch(oldmap);
15731                 vm_map_deallocate(map);
15732         }
15733         return kr;
15734 }
15735
15736
15737 /*
15738  *      vm_map_check_protection:
15739  *
15740  *      Assert that the target map allows the specified
15741  *      privilege on the entire address region given.
15742  *      The entire region must be allocated.
15743  */
15744 boolean_t
15745 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
15746                         vm_map_offset_t end, vm_prot_t protection)
15747 {
15748         vm_map_entry_t entry;
15749         vm_map_entry_t tmp_entry;
15750
15751         vm_map_lock(map);
15752
15753         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end)
15754         {
15755                 vm_map_unlock(map);
15756                 return (FALSE);
15757         }
15758
15759         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
15760                 vm_map_unlock(map);
15761                 return(FALSE);
15762         }
15763
15764         entry = tmp_entry;
15765
15766         while (start < end) {
15767                 if (entry == vm_map_to_entry(map)) {
15768                         vm_map_unlock(map);
15769                         return(FALSE);
15770                 }
15771
15772                 /*
15773                  *      No holes allowed!
15774                  */
15775
15776                 if (start < entry->vme_start) {
15777                         vm_map_unlock(map);
15778                         return(FALSE);
15779                 }
15780
15781                 /*
15782                  * Check protection associated with entry.
15783                  */
15784
15785                 if ((entry->protection & protection) != protection) {
15786                         vm_map_unlock(map);
15787                         return(FALSE);
15788                 }
15789
15790                 /* go to next entry */
15791
15792                 start = entry->vme_end;
15793                 entry = entry->vme_next;
15794         }
15795         vm_map_unlock(map);
15796         return(TRUE);
15797 }
15798
15799 kern_return_t
15800 vm_map_purgable_control(
15801         vm_map_t                map,
15802         vm_map_offset_t         address,
15803         vm_purgable_t           control,
15804         int                     *state)
15805 {
15806         vm_map_entry_t          entry;
15807         vm_object_t             object;
15808         kern_return_t           kr;
15809         boolean_t               was_nonvolatile;
15810
15811         /*
15812          * Vet all the input parameters and current type and state of the
15813          * underlaying object.  Return with an error if anything is amiss.
15814          */
15815         if (map == VM_MAP_NULL)
15816                 return(KERN_INVALID_ARGUMENT);
15817
15818         if (control != VM_PURGABLE_SET_STATE &&
15819             control != VM_PURGABLE_GET_STATE &&
15820             control != VM_PURGABLE_PURGE_ALL &&
15821             control != VM_PURGABLE_SET_STATE_FROM_KERNEL)
15822                 return(KERN_INVALID_ARGUMENT);
15823
15824         if (control == VM_PURGABLE_PURGE_ALL) {
15825                 vm_purgeable_object_purge_all();
15826                 return KERN_SUCCESS;
15827         }
15828
15829         if ((control == VM_PURGABLE_SET_STATE ||
15830              control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
15831             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
15832              ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK)))
15833                 return(KERN_INVALID_ARGUMENT);
15834
15835         vm_map_lock_read(map);
15836
15837         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
15838
15839                 /*
15840                  * Must pass a valid non-submap address.
15841                  */
15842                 vm_map_unlock_read(map);
15843                 return(KERN_INVALID_ADDRESS);
15844         }
15845
15846         if ((entry->protection & VM_PROT_WRITE) == 0) {
15847                 /*
15848                  * Can't apply purgable controls to something you can't write.
15849                  */
15850                 vm_map_unlock_read(map);
15851                 return(KERN_PROTECTION_FAILURE);
15852         }
15853
15854         object = VME_OBJECT(entry);
15855         if (object == VM_OBJECT_NULL ||
15856             object->purgable == VM_PURGABLE_DENY) {
15857                 /*
15858                  * Object must already be present and be purgeable.
15859                  */
15860                 vm_map_unlock_read(map);
15861                 return KERN_INVALID_ARGUMENT;
15862         }
15863
15864         vm_object_lock(object);
15865
15866 #if 00
15867         if (VME_OFFSET(entry) != 0 ||
15868             entry->vme_end - entry->vme_start != object->vo_size) {
15869                 /*
15870                  * Can only apply purgable controls to the whole (existing)
15871                  * object at once.
15872                  */
15873                 vm_map_unlock_read(map);
15874                 vm_object_unlock(object);
15875                 return KERN_INVALID_ARGUMENT;
15876         }
15877 #endif
15878
15879         assert(!entry->is_sub_map);
15880         assert(!entry->use_pmap); /* purgeable has its own accounting */
15881
15882         vm_map_unlock_read(map);
15883
15884         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
15885
15886         kr = vm_object_purgable_control(object, control, state);
15887
15888         if (was_nonvolatile &&
15889             object->purgable != VM_PURGABLE_NONVOLATILE &&
15890             map->pmap == kernel_pmap) {
15891 #if DEBUG
15892                 object->vo_purgeable_volatilizer = kernel_task;
15893 #endif /* DEBUG */
15894         }
15895
15896         vm_object_unlock(object);
15897
15898         return kr;
15899 }
15900
15901 kern_return_t
15902 vm_map_page_query_internal(
15903         vm_map_t        target_map,
15904         vm_map_offset_t offset,
15905         int             *disposition,
15906         int             *ref_count)
15907 {
15908         kern_return_t                   kr;
15909         vm_page_info_basic_data_t       info;
15910         mach_msg_type_number_t          count;
15911
15912         count = VM_PAGE_INFO_BASIC_COUNT;
15913         kr = vm_map_page_info(target_map,
15914                               offset,
15915                               VM_PAGE_INFO_BASIC,
15916                               (vm_page_info_t) &info,
15917                               &count);
15918         if (kr == KERN_SUCCESS) {
15919                 *disposition = info.disposition;
15920                 *ref_count = info.ref_count;
15921         } else {
15922                 *disposition = 0;
15923                 *ref_count = 0;
15924         }
15925
15926         return kr;
15927 }
15928
15929 kern_return_t
15930 vm_map_page_info(
15931         vm_map_t                map,
15932         vm_map_offset_t         offset,
15933         vm_page_info_flavor_t   flavor,
15934         vm_page_info_t          info,
15935         mach_msg_type_number_t  *count)
15936 {
15937         return (vm_map_page_range_info_internal(map,
15938                                        offset, /* start of range */
15939                                        (offset + 1), /* this will get rounded in the call to the page boundary */
15940                                        flavor,
15941                                        info,
15942                                        count));
15943 }
15944
15945 kern_return_t
15946 vm_map_page_range_info_internal(
15947         vm_map_t                map,
15948         vm_map_offset_t         start_offset,
15949         vm_map_offset_t         end_offset,
15950         vm_page_info_flavor_t   flavor,
15951         vm_page_info_t          info,
15952         mach_msg_type_number_t  *count)
15953 {
15954         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
15955         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
15956         vm_page_t               m = VM_PAGE_NULL;
15957         kern_return_t           retval = KERN_SUCCESS;
15958         int                     disposition = 0;
15959         int                     ref_count = 0;
15960         int                     depth = 0, info_idx = 0;
15961         vm_page_info_basic_t    basic_info = 0;
15962         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
15963         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
15964
15965         switch (flavor) {
15966         case VM_PAGE_INFO_BASIC:
15967                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
15968                         /*
15969                          * The "vm_page_info_basic_data" structure was not
15970                          * properly padded, so allow the size to be off by
15971                          * one to maintain backwards binary compatibility...
15972                          */
15973                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1)
15974                                 return KERN_INVALID_ARGUMENT;
15975                 }
15976                 break;
15977         default:
15978                 return KERN_INVALID_ARGUMENT;
15979         }
15980
15981         disposition = 0;
15982         ref_count = 0;
15983         depth = 0;
15984         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
15985         retval = KERN_SUCCESS;
15986
15987         offset_in_page = start_offset & PAGE_MASK;
15988         start = vm_map_trunc_page(start_offset, PAGE_MASK);
15989         end = vm_map_round_page(end_offset, PAGE_MASK);
15990
15991         assert ((end - start) <= MAX_PAGE_RANGE_QUERY);
15992
15993         vm_map_lock_read(map);
15994
15995         for (curr_s_offset = start; curr_s_offset < end;) {
15996                 /*
15997                  * New lookup needs reset of these variables.
15998                  */
15999                 curr_object = object = VM_OBJECT_NULL;
16000                 offset_in_object = 0;
16001                 ref_count = 0;
16002                 depth = 0;
16003
16004                 /*
16005                  * First, find the map entry covering "curr_s_offset", going down
16006                  * submaps if necessary.
16007                  */
16008                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
16009                         /* no entry -> no object -> no page */
16010
16011                         if (curr_s_offset < vm_map_min(map)) {
16012                                 /*
16013                                  * Illegal address that falls below map min.
16014                                  */
16015                                 curr_e_offset = MIN(end, vm_map_min(map));
16016
16017                         } else if (curr_s_offset >= vm_map_max(map)) {
16018                                 /*
16019                                  * Illegal address that falls on/after map max.
16020                                  */
16021                                 curr_e_offset = end;
16022
16023                         } else if (map_entry == vm_map_to_entry(map)) {
16024                                 /*
16025                                  * Hit a hole.
16026                                  */
16027                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
16028                                         /*
16029                                          * Empty map.
16030                                          */
16031                                         curr_e_offset = MIN(map->max_offset, end);
16032                                 } else {
16033                                         /*
16034                                          * Hole at start of the map.
16035                                          */
16036                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16037                                 }
16038                         } else {
16039                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
16040                                         /*
16041                                          * Hole at the end of the map.
16042                                          */
16043                                         curr_e_offset = MIN(map->max_offset, end);
16044                                 } else {
16045                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
16046                                 }
16047                         }
16048
16049                         assert(curr_e_offset >= curr_s_offset);
16050
16051                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16052
16053                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16054
16055                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16056
16057                         curr_s_offset = curr_e_offset;
16058
16059                         info_idx += num_pages;
16060
16061                         continue;
16062                 }
16063
16064                 /* compute offset from this map entry's start */
16065                 offset_in_object = curr_s_offset - map_entry->vme_start;
16066
16067                 /* compute offset into this map entry's object (or submap) */
16068                 offset_in_object += VME_OFFSET(map_entry);
16069
16070                 if (map_entry->is_sub_map) {
16071                         vm_map_t sub_map = VM_MAP_NULL;
16072                         vm_page_info_t submap_info = 0;
16073                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
16074
16075                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
16076
16077                         submap_s_offset = offset_in_object;
16078                         submap_e_offset = submap_s_offset + range_len;
16079
16080                         sub_map = VME_SUBMAP(map_entry);
16081
16082                         vm_map_reference(sub_map);
16083                         vm_map_unlock_read(map);
16084
16085                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16086
16087                         retval = vm_map_page_range_info_internal(sub_map,
16088                                               submap_s_offset,
16089                                               submap_e_offset,
16090                                               VM_PAGE_INFO_BASIC,
16091                                               (vm_page_info_t) submap_info,
16092                                               count);
16093
16094                         assert(retval == KERN_SUCCESS);
16095
16096                         vm_map_lock_read(map);
16097                         vm_map_deallocate(sub_map);
16098
16099                         /* Move the "info" index by the number of pages we inspected.*/
16100                         info_idx += range_len >> PAGE_SHIFT;
16101
16102                         /* Move our current offset by the size of the range we inspected.*/
16103                         curr_s_offset += range_len;
16104
16105                         continue;
16106                 }
16107
16108                 object = VME_OBJECT(map_entry);
16109                 if (object == VM_OBJECT_NULL) {
16110
16111                         /*
16112                          * We don't have an object here and, hence,
16113                          * no pages to inspect. We'll fill up the
16114                          * info structure appropriately.
16115                          */
16116
16117                         curr_e_offset = MIN(map_entry->vme_end, end);
16118
16119                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> PAGE_SHIFT;
16120
16121                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16122
16123                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
16124
16125                         curr_s_offset = curr_e_offset;
16126
16127                         info_idx += num_pages;
16128
16129                         continue;
16130                 }
16131
16132                 vm_object_reference(object);
16133                 /*
16134                  * Shared mode -- so we can allow other readers
16135                  * to grab the lock too.
16136                  */
16137                 vm_object_lock_shared(object);
16138
16139                 curr_e_offset = MIN(map_entry->vme_end, end);
16140
16141                 vm_map_unlock_read(map);
16142
16143                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
16144
16145                 curr_object = object;
16146
16147                 for (; curr_s_offset < curr_e_offset;) {
16148
16149                         if (object == curr_object) {
16150                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
16151                         } else {
16152                                 ref_count = curr_object->ref_count;
16153                         }
16154
16155                         curr_offset_in_object = offset_in_object;
16156
16157                         for (;;) {
16158                                 m = vm_page_lookup(curr_object, curr_offset_in_object);
16159
16160                                 if (m != VM_PAGE_NULL) {
16161
16162                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
16163                                         break;
16164
16165                                 } else {
16166                                         if (curr_object->internal &&
16167                                             curr_object->alive &&
16168                                             !curr_object->terminating &&
16169                                             curr_object->pager_ready) {
16170
16171                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, curr_offset_in_object)
16172                                                     == VM_EXTERNAL_STATE_EXISTS) {
16173                                                         /* the pager has that page */
16174                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
16175                                                         break;
16176                                                 }
16177                                         }
16178
16179                                         /*
16180                                          * Go down the VM object shadow chain until we find the page
16181                                          * we're looking for.
16182                                          */
16183
16184                                         if (curr_object->shadow != VM_OBJECT_NULL) {
16185                                                 vm_object_t shadow = VM_OBJECT_NULL;
16186
16187                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
16188                                                 shadow = curr_object->shadow;
16189
16190                                                 vm_object_lock_shared(shadow);
16191                                                 vm_object_unlock(curr_object);
16192
16193                                                 curr_object = shadow;
16194                                                 depth++;
16195                                                 continue;
16196                                         } else {
16197
16198                                                 break;
16199                                         }
16200                                 }
16201                         }
16202
16203                         /* The ref_count is not strictly accurate, it measures the number   */
16204                         /* of entities holding a ref on the object, they may not be mapping */
16205                         /* the object or may not be mapping the section holding the         */
16206                         /* target page but its still a ball park number and though an over- */
16207                         /* count, it picks up the copy-on-write cases                       */
16208
16209                         /* We could also get a picture of page sharing from pmap_attributes */
16210                         /* but this would under count as only faulted-in mappings would     */
16211                         /* show up.                                                         */
16212
16213                         if ((curr_object == object) && curr_object->shadow)
16214                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
16215
16216                         if (! curr_object->internal)
16217                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
16218
16219                         if (m != VM_PAGE_NULL) {
16220
16221                                 if (m->fictitious) {
16222
16223                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
16224
16225                                 } else {
16226                                         if (m->dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m)))
16227                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
16228
16229                                         if (m->reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m)))
16230                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
16231
16232                                         if (m->vm_page_q_state == VM_PAGE_ON_SPECULATIVE_Q)
16233                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
16234
16235                                         if (m->cs_validated)
16236                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
16237                                         if (m->cs_tainted)
16238                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
16239                                         if (m->cs_nx)
16240                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
16241                                 }
16242                         }
16243
16244                         switch (flavor) {
16245                         case VM_PAGE_INFO_BASIC:
16246                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
16247                                 basic_info->disposition = disposition;
16248                                 basic_info->ref_count = ref_count;
16249                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
16250                                         VM_KERNEL_ADDRPERM(curr_object);
16251                                 basic_info->offset =
16252                                         (memory_object_offset_t) curr_offset_in_object + offset_in_page;
16253                                 basic_info->depth = depth;
16254
16255                                 info_idx++;
16256                                 break;
16257                         }
16258
16259                         disposition = 0;
16260                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
16261
16262                         /*
16263                          * Move to next offset in the range and in our object.
16264                          */
16265                         curr_s_offset += PAGE_SIZE;
16266                         offset_in_object += PAGE_SIZE;
16267                         curr_offset_in_object = offset_in_object;
16268
16269                         if (curr_object != object) {
16270
16271                                 vm_object_unlock(curr_object);
16272
16273                                 curr_object = object;
16274
16275                                 vm_object_lock_shared(curr_object);
16276                         } else {
16277
16278                                 vm_object_lock_yield_shared(curr_object);
16279                         }
16280                 }
16281
16282                 vm_object_unlock(curr_object);
16283                 vm_object_deallocate(curr_object);
16284
16285                 vm_map_lock_read(map);
16286         }
16287
16288         vm_map_unlock_read(map);
16289         return retval;
16290 }
16291
16292 /*
16293  *      vm_map_msync
16294  *
16295  *      Synchronises the memory range specified with its backing store
16296  *      image by either flushing or cleaning the contents to the appropriate
16297  *      memory manager engaging in a memory object synchronize dialog with
16298  *      the manager.  The client doesn't return until the manager issues
16299  *      m_o_s_completed message.  MIG Magically converts user task parameter
16300  *      to the task's address map.
16301  *
16302  *      interpretation of sync_flags
16303  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
16304  *                                pages to manager.
16305  *
16306  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
16307  *                              - discard pages, write dirty or precious
16308  *                                pages back to memory manager.
16309  *
16310  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
16311  *                              - write dirty or precious pages back to
16312  *                                the memory manager.
16313  *
16314  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
16315  *                                is a hole in the region, and we would
16316  *                                have returned KERN_SUCCESS, return
16317  *                                KERN_INVALID_ADDRESS instead.
16318  *
16319  *      NOTE
16320  *      The memory object attributes have not yet been implemented, this
16321  *      function will have to deal with the invalidate attribute
16322  *
16323  *      RETURNS
16324  *      KERN_INVALID_TASK               Bad task parameter
16325  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
16326  *      KERN_SUCCESS                    The usual.
16327  *      KERN_INVALID_ADDRESS            There was a hole in the region.
16328  */
16329
16330 kern_return_t
16331 vm_map_msync(
16332         vm_map_t                map,
16333         vm_map_address_t        address,
16334         vm_map_size_t           size,
16335         vm_sync_t               sync_flags)
16336 {
16337         vm_map_entry_t          entry;
16338         vm_map_size_t           amount_left;
16339         vm_object_offset_t      offset;
16340         boolean_t               do_sync_req;
16341         boolean_t               had_hole = FALSE;
16342         vm_map_offset_t         pmap_offset;
16343
16344         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
16345             (sync_flags & VM_SYNC_SYNCHRONOUS))
16346                 return(KERN_INVALID_ARGUMENT);
16347
16348         /*
16349          * align address and size on page boundaries
16350          */
16351         size = (vm_map_round_page(address + size,
16352                                   VM_MAP_PAGE_MASK(map)) -
16353                 vm_map_trunc_page(address,
16354                                   VM_MAP_PAGE_MASK(map)));
16355         address = vm_map_trunc_page(address,
16356                                     VM_MAP_PAGE_MASK(map));
16357
16358         if (map == VM_MAP_NULL)
16359                 return(KERN_INVALID_TASK);
16360
16361         if (size == 0)
16362                 return(KERN_SUCCESS);
16363
16364         amount_left = size;
16365
16366         while (amount_left > 0) {
16367                 vm_object_size_t        flush_size;
16368                 vm_object_t             object;
16369
16370                 vm_map_lock(map);
16371                 if (!vm_map_lookup_entry(map,
16372                                          address,
16373                                          &entry)) {
16374
16375                         vm_map_size_t   skip;
16376
16377                         /*
16378                          * hole in the address map.
16379                          */
16380                         had_hole = TRUE;
16381
16382                         if (sync_flags & VM_SYNC_KILLPAGES) {
16383                                 /*
16384                                  * For VM_SYNC_KILLPAGES, there should be
16385                                  * no holes in the range, since we couldn't
16386                                  * prevent someone else from allocating in
16387                                  * that hole and we wouldn't want to "kill"
16388                                  * their pages.
16389                                  */
16390                                 vm_map_unlock(map);
16391                                 break;
16392                         }
16393
16394                         /*
16395                          * Check for empty map.
16396                          */
16397                         if (entry == vm_map_to_entry(map) &&
16398                             entry->vme_next == entry) {
16399                                 vm_map_unlock(map);
16400                                 break;
16401                         }
16402                         /*
16403                          * Check that we don't wrap and that
16404                          * we have at least one real map entry.
16405                          */
16406                         if ((map->hdr.nentries == 0) ||
16407                             (entry->vme_next->vme_start < address)) {
16408                                 vm_map_unlock(map);
16409                                 break;
16410                         }
16411                         /*
16412                          * Move up to the next entry if needed
16413                          */
16414                         skip = (entry->vme_next->vme_start - address);
16415                         if (skip >= amount_left)
16416                                 amount_left = 0;
16417                         else
16418                                 amount_left -= skip;
16419                         address = entry->vme_next->vme_start;
16420                         vm_map_unlock(map);
16421                         continue;
16422                 }
16423
16424                 offset = address - entry->vme_start;
16425                 pmap_offset = address;
16426
16427                 /*
16428                  * do we have more to flush than is contained in this
16429                  * entry ?
16430                  */
16431                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
16432                         flush_size = entry->vme_end -
16433                                 (entry->vme_start + offset);
16434                 } else {
16435                         flush_size = amount_left;
16436                 }
16437                 amount_left -= flush_size;
16438                 address += flush_size;
16439
16440                 if (entry->is_sub_map == TRUE) {
16441                         vm_map_t        local_map;
16442                         vm_map_offset_t local_offset;
16443
16444                         local_map = VME_SUBMAP(entry);
16445                         local_offset = VME_OFFSET(entry);
16446                         vm_map_unlock(map);
16447                         if (vm_map_msync(
16448                                     local_map,
16449                                     local_offset,
16450                                     flush_size,
16451                                     sync_flags) == KERN_INVALID_ADDRESS) {
16452                                 had_hole = TRUE;
16453                         }
16454                         continue;
16455                 }
16456                 object = VME_OBJECT(entry);
16457
16458                 /*
16459                  * We can't sync this object if the object has not been
16460                  * created yet
16461                  */
16462                 if (object == VM_OBJECT_NULL) {
16463                         vm_map_unlock(map);
16464                         continue;
16465                 }
16466                 offset += VME_OFFSET(entry);
16467
16468                 vm_object_lock(object);
16469
16470                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
16471                         int kill_pages = 0;
16472                         boolean_t reusable_pages = FALSE;
16473
16474                         if (sync_flags & VM_SYNC_KILLPAGES) {
16475                                 if (((object->ref_count == 1) ||
16476                                      ((object->copy_strategy !=
16477                                        MEMORY_OBJECT_COPY_SYMMETRIC) &&
16478                                       (object->copy == VM_OBJECT_NULL))) &&
16479                                     (object->shadow == VM_OBJECT_NULL)) {
16480                                         if (object->ref_count != 1) {
16481                                                 vm_page_stats_reusable.free_shared++;
16482                                         }
16483                                         kill_pages = 1;
16484                                 } else {
16485                                         kill_pages = -1;
16486                                 }
16487                         }
16488                         if (kill_pages != -1)
16489                                 vm_object_deactivate_pages(
16490                                         object,
16491                                         offset,
16492                                         (vm_object_size_t) flush_size,
16493                                         kill_pages,
16494                                         reusable_pages,
16495                                         map->pmap,
16496                                         pmap_offset);
16497                         vm_object_unlock(object);
16498                         vm_map_unlock(map);
16499                         continue;
16500                 }
16501                 /*
16502                  * We can't sync this object if there isn't a pager.
16503                  * Don't bother to sync internal objects, since there can't
16504                  * be any "permanent" storage for these objects anyway.
16505                  */
16506                 if ((object->pager == MEMORY_OBJECT_NULL) ||
16507                     (object->internal) || (object->private)) {
16508                         vm_object_unlock(object);
16509                         vm_map_unlock(map);
16510                         continue;
16511                 }
16512                 /*
16513                  * keep reference on the object until syncing is done
16514                  */
16515                 vm_object_reference_locked(object);
16516                 vm_object_unlock(object);
16517
16518                 vm_map_unlock(map);
16519
16520                 do_sync_req = vm_object_sync(object,
16521                                              offset,
16522                                              flush_size,
16523                                              sync_flags & VM_SYNC_INVALIDATE,
16524                                              ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
16525                                               (sync_flags & VM_SYNC_ASYNCHRONOUS)),
16526                                              sync_flags & VM_SYNC_SYNCHRONOUS);
16527
16528                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
16529                         /*
16530                          * clear out the clustering and read-ahead hints
16531                          */
16532                         vm_object_lock(object);
16533
16534                         object->pages_created = 0;
16535                         object->pages_used = 0;
16536                         object->sequential = 0;
16537                         object->last_alloc = 0;
16538
16539                         vm_object_unlock(object);
16540                 }
16541                 vm_object_deallocate(object);
16542         } /* while */
16543
16544         /* for proper msync() behaviour */
16545         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS))
16546                 return(KERN_INVALID_ADDRESS);
16547
16548         return(KERN_SUCCESS);
16549 }/* vm_msync */
16550
16551 /*
16552  *      Routine:        convert_port_entry_to_map
16553  *      Purpose:
16554  *              Convert from a port specifying an entry or a task
16555  *              to a map. Doesn't consume the port ref; produces a map ref,
16556  *              which may be null.  Unlike convert_port_to_map, the
16557  *              port may be task or a named entry backed.
16558  *      Conditions:
16559  *              Nothing locked.
16560  */
16561
16562
16563 vm_map_t
16564 convert_port_entry_to_map(
16565         ipc_port_t      port)
16566 {
16567         vm_map_t map;
16568         vm_named_entry_t        named_entry;
16569         uint32_t        try_failed_count = 0;
16570
16571         if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16572                 while(TRUE) {
16573                         ip_lock(port);
16574                         if(ip_active(port) && (ip_kotype(port)
16575                                                == IKOT_NAMED_ENTRY)) {
16576                                 named_entry =
16577                                         (vm_named_entry_t)port->ip_kobject;
16578                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
16579                                         ip_unlock(port);
16580
16581                                         try_failed_count++;
16582                                         mutex_pause(try_failed_count);
16583                                         continue;
16584                                 }
16585                                 named_entry->ref_count++;
16586                                 lck_mtx_unlock(&(named_entry)->Lock);
16587                                 ip_unlock(port);
16588                                 if ((named_entry->is_sub_map) &&
16589                                     (named_entry->protection
16590                                      & VM_PROT_WRITE)) {
16591                                         map = named_entry->backing.map;
16592                                 } else {
16593                                         mach_destroy_memory_entry(port);
16594                                         return VM_MAP_NULL;
16595                                 }
16596                                 vm_map_reference_swap(map);
16597                                 mach_destroy_memory_entry(port);
16598                                 break;
16599                         }
16600                         else
16601                                 return VM_MAP_NULL;
16602                 }
16603         }
16604         else
16605                 map = convert_port_to_map(port);
16606
16607         return map;
16608 }
16609
16610 /*
16611  *      Routine:        convert_port_entry_to_object
16612  *      Purpose:
16613  *              Convert from a port specifying a named entry to an
16614  *              object. Doesn't consume the port ref; produces a map ref,
16615  *              which may be null.
16616  *      Conditions:
16617  *              Nothing locked.
16618  */
16619
16620
16621 vm_object_t
16622 convert_port_entry_to_object(
16623         ipc_port_t      port)
16624 {
16625         vm_object_t             object = VM_OBJECT_NULL;
16626         vm_named_entry_t        named_entry;
16627         uint32_t                try_failed_count = 0;
16628
16629         if (IP_VALID(port) &&
16630             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16631         try_again:
16632                 ip_lock(port);
16633                 if (ip_active(port) &&
16634                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
16635                         named_entry = (vm_named_entry_t)port->ip_kobject;
16636                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
16637                                 ip_unlock(port);
16638                                 try_failed_count++;
16639                                 mutex_pause(try_failed_count);
16640                                 goto try_again;
16641                         }
16642                         named_entry->ref_count++;
16643                         lck_mtx_unlock(&(named_entry)->Lock);
16644                         ip_unlock(port);
16645                         if (!(named_entry->is_sub_map) &&
16646                             !(named_entry->is_copy) &&
16647                             (named_entry->protection & VM_PROT_WRITE)) {
16648                                 object = named_entry->backing.object;
16649                                 vm_object_reference(object);
16650                         }
16651                         mach_destroy_memory_entry(port);
16652                 }
16653         }
16654
16655         return object;
16656 }
16657
16658 /*
16659  * Export routines to other components for the things we access locally through
16660  * macros.
16661  */
16662 #undef current_map
16663 vm_map_t
16664 current_map(void)
16665 {
16666         return (current_map_fast());
16667 }
16668
16669 /*
16670  *      vm_map_reference:
16671  *
16672  *      Most code internal to the osfmk will go through a
16673  *      macro defining this.  This is always here for the
16674  *      use of other kernel components.
16675  */
16676 #undef vm_map_reference
16677 void
16678 vm_map_reference(
16679         vm_map_t        map)
16680 {
16681         if (map == VM_MAP_NULL)
16682                 return;
16683
16684         lck_mtx_lock(&map->s_lock);
16685 #if     TASK_SWAPPER
16686         assert(map->res_count > 0);
16687         assert(map->ref_count >= map->res_count);
16688         map->res_count++;
16689 #endif
16690         map->ref_count++;
16691         lck_mtx_unlock(&map->s_lock);
16692 }
16693
16694 /*
16695  *      vm_map_deallocate:
16696  *
16697  *      Removes a reference from the specified map,
16698  *      destroying it if no references remain.
16699  *      The map should not be locked.
16700  */
16701 void
16702 vm_map_deallocate(
16703         vm_map_t        map)
16704 {
16705         unsigned int            ref;
16706
16707         if (map == VM_MAP_NULL)
16708                 return;
16709
16710         lck_mtx_lock(&map->s_lock);
16711         ref = --map->ref_count;
16712         if (ref > 0) {
16713                 vm_map_res_deallocate(map);
16714                 lck_mtx_unlock(&map->s_lock);
16715                 return;
16716         }
16717         assert(map->ref_count == 0);
16718         lck_mtx_unlock(&map->s_lock);
16719
16720 #if     TASK_SWAPPER
16721         /*
16722          * The map residence count isn't decremented here because
16723          * the vm_map_delete below will traverse the entire map,
16724          * deleting entries, and the residence counts on objects
16725          * and sharing maps will go away then.
16726          */
16727 #endif
16728
16729         vm_map_destroy(map, VM_MAP_NO_FLAGS);
16730 }
16731
16732
16733 void
16734 vm_map_disable_NX(vm_map_t map)
16735 {
16736         if (map == NULL)
16737                 return;
16738         if (map->pmap == NULL)
16739                 return;
16740
16741         pmap_disable_NX(map->pmap);
16742 }
16743
16744 void
16745 vm_map_disallow_data_exec(vm_map_t map)
16746 {
16747     if (map == NULL)
16748         return;
16749
16750     map->map_disallow_data_exec = TRUE;
16751 }
16752
16753 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
16754  * more descriptive.
16755  */
16756 void
16757 vm_map_set_32bit(vm_map_t map)
16758 {
16759 #if defined(__arm__) || defined(__arm64__)
16760         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
16761 #else
16762         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
16763 #endif
16764 }
16765
16766
16767 void
16768 vm_map_set_64bit(vm_map_t map)
16769 {
16770 #if defined(__arm__) || defined(__arm64__)
16771         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
16772 #else
16773         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
16774 #endif
16775 }
16776
16777 /*
16778  * Expand the maximum size of an existing map.
16779  */
16780 void
16781 vm_map_set_jumbo(vm_map_t map)
16782 {
16783 #if defined (__arm64__)
16784         vm_map_offset_t old_max_offset = map->max_offset;
16785         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_JUMBO);
16786         if (map->holes_list->prev->vme_end == pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE)) {
16787                 /*
16788                  * There is already a hole at the end of the map; simply make it bigger.
16789                  */
16790                 map->holes_list->prev->vme_end = map->max_offset;
16791         } else {
16792                 /*
16793                  * There is no hole at the end, so we need to create a new hole
16794                  * for the new empty space we're creating.
16795                  */
16796                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
16797                 new_hole->start = old_max_offset;
16798                 new_hole->end = map->max_offset;
16799                 new_hole->prev = map->holes_list->prev;
16800                 new_hole->next = (struct vm_map_entry *)map->holes_list;
16801                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
16802                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
16803         }
16804 #else /* arm64 */
16805         (void) map;
16806 #endif
16807 }
16808
16809 vm_map_offset_t
16810 vm_compute_max_offset(boolean_t is64)
16811 {
16812 #if defined(__arm__) || defined(__arm64__)
16813         return (pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE));
16814 #else
16815         return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS);
16816 #endif
16817 }
16818
16819 void
16820 vm_map_get_max_aslr_slide_section(
16821                 vm_map_t                map __unused,
16822                 int64_t                 *max_sections,
16823                 int64_t                 *section_size)
16824 {
16825 #if defined(__arm64__)
16826         *max_sections = 3;
16827         *section_size = ARM_TT_TWIG_SIZE;
16828 #else
16829         *max_sections = 1;
16830         *section_size = 0;
16831 #endif
16832 }
16833
16834 uint64_t
16835 vm_map_get_max_aslr_slide_pages(vm_map_t map)
16836 {
16837 #if defined(__arm64__)
16838         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
16839          * limited embedded address space; this is also meant to minimize pmap
16840          * memory usage on 16KB page systems.
16841          */
16842         return (1 << (24 - VM_MAP_PAGE_SHIFT(map)));
16843 #else
16844         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
16845 #endif
16846 }
16847
16848 uint64_t
16849 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
16850 {
16851 #if defined(__arm64__)
16852         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
16853          * of independent entropy on 16KB page systems.
16854          */
16855         return (1 << (22 - VM_MAP_PAGE_SHIFT(map)));
16856 #else
16857         return (1 << (vm_map_is_64bit(map) ? 16 : 8));
16858 #endif
16859 }
16860
16861 #ifndef __arm__
16862 boolean_t
16863 vm_map_is_64bit(
16864                 vm_map_t map)
16865 {
16866         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
16867 }
16868 #endif
16869
16870 boolean_t
16871 vm_map_has_hard_pagezero(
16872                 vm_map_t        map,
16873                 vm_map_offset_t pagezero_size)
16874 {
16875         /*
16876          * XXX FBDP
16877          * We should lock the VM map (for read) here but we can get away
16878          * with it for now because there can't really be any race condition:
16879          * the VM map's min_offset is changed only when the VM map is created
16880          * and when the zero page is established (when the binary gets loaded),
16881          * and this routine gets called only when the task terminates and the
16882          * VM map is being torn down, and when a new map is created via
16883          * load_machfile()/execve().
16884          */
16885         return (map->min_offset >= pagezero_size);
16886 }
16887
16888 /*
16889  * Raise a VM map's maximun offset.
16890  */
16891 kern_return_t
16892 vm_map_raise_max_offset(
16893         vm_map_t        map,
16894         vm_map_offset_t new_max_offset)
16895 {
16896         kern_return_t   ret;
16897
16898         vm_map_lock(map);
16899         ret = KERN_INVALID_ADDRESS;
16900
16901         if (new_max_offset >= map->max_offset) {
16902                 if (!vm_map_is_64bit(map)) {
16903                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
16904                                 map->max_offset = new_max_offset;
16905                                 ret = KERN_SUCCESS;
16906                         }
16907                 } else {
16908                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
16909                                 map->max_offset = new_max_offset;
16910                                 ret = KERN_SUCCESS;
16911                         }
16912                 }
16913         }
16914
16915         vm_map_unlock(map);
16916         return ret;
16917 }
16918
16919
16920 /*
16921  * Raise a VM map's minimum offset.
16922  * To strictly enforce "page zero" reservation.
16923  */
16924 kern_return_t
16925 vm_map_raise_min_offset(
16926         vm_map_t        map,
16927         vm_map_offset_t new_min_offset)
16928 {
16929         vm_map_entry_t  first_entry;
16930
16931         new_min_offset = vm_map_round_page(new_min_offset,
16932                                            VM_MAP_PAGE_MASK(map));
16933
16934         vm_map_lock(map);
16935
16936         if (new_min_offset < map->min_offset) {
16937                 /*
16938                  * Can't move min_offset backwards, as that would expose
16939                  * a part of the address space that was previously, and for
16940                  * possibly good reasons, inaccessible.
16941                  */
16942                 vm_map_unlock(map);
16943                 return KERN_INVALID_ADDRESS;
16944         }
16945         if (new_min_offset >= map->max_offset) {
16946                 /* can't go beyond the end of the address space */
16947                 vm_map_unlock(map);
16948                 return KERN_INVALID_ADDRESS;
16949         }
16950
16951         first_entry = vm_map_first_entry(map);
16952         if (first_entry != vm_map_to_entry(map) &&
16953             first_entry->vme_start < new_min_offset) {
16954                 /*
16955                  * Some memory was already allocated below the new
16956                  * minimun offset.  It's too late to change it now...
16957                  */
16958                 vm_map_unlock(map);
16959                 return KERN_NO_SPACE;
16960         }
16961
16962         map->min_offset = new_min_offset;
16963
16964         assert(map->holes_list);
16965         map->holes_list->start = new_min_offset;
16966         assert(new_min_offset < map->holes_list->end);
16967
16968         vm_map_unlock(map);
16969
16970         return KERN_SUCCESS;
16971 }
16972
16973 /*
16974  * Set the limit on the maximum amount of user wired memory allowed for this map.
16975  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
16976  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
16977  * don't have to reach over to the BSD data structures.
16978  */
16979
16980 void
16981 vm_map_set_user_wire_limit(vm_map_t     map,
16982                            vm_size_t    limit)
16983 {
16984         map->user_wire_limit = limit;
16985 }
16986
16987
16988 void vm_map_switch_protect(vm_map_t     map,
16989                            boolean_t    val)
16990 {
16991         vm_map_lock(map);
16992         map->switch_protect=val;
16993         vm_map_unlock(map);
16994 }
16995
16996 /*
16997  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
16998  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
16999  * bump both counters.
17000  */
17001 void
17002 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
17003 {
17004         pmap_t pmap = vm_map_pmap(map);
17005
17006         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17007         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17008 }
17009
17010 void
17011 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
17012 {
17013         pmap_t pmap = vm_map_pmap(map);
17014
17015         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
17016         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
17017 }
17018
17019 /* Add (generate) code signature for memory range */
17020 #if CONFIG_DYNAMIC_CODE_SIGNING
17021 kern_return_t vm_map_sign(vm_map_t map,
17022                  vm_map_offset_t start,
17023                  vm_map_offset_t end)
17024 {
17025         vm_map_entry_t entry;
17026         vm_page_t m;
17027         vm_object_t object;
17028
17029         /*
17030          * Vet all the input parameters and current type and state of the
17031          * underlaying object.  Return with an error if anything is amiss.
17032          */
17033         if (map == VM_MAP_NULL)
17034                 return(KERN_INVALID_ARGUMENT);
17035
17036         vm_map_lock_read(map);
17037
17038         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
17039                 /*
17040                  * Must pass a valid non-submap address.
17041                  */
17042                 vm_map_unlock_read(map);
17043                 return(KERN_INVALID_ADDRESS);
17044         }
17045
17046         if((entry->vme_start > start) || (entry->vme_end < end)) {
17047                 /*
17048                  * Map entry doesn't cover the requested range. Not handling
17049                  * this situation currently.
17050                  */
17051                 vm_map_unlock_read(map);
17052                 return(KERN_INVALID_ARGUMENT);
17053         }
17054
17055         object = VME_OBJECT(entry);
17056         if (object == VM_OBJECT_NULL) {
17057                 /*
17058                  * Object must already be present or we can't sign.
17059                  */
17060                 vm_map_unlock_read(map);
17061                 return KERN_INVALID_ARGUMENT;
17062         }
17063
17064         vm_object_lock(object);
17065         vm_map_unlock_read(map);
17066
17067         while(start < end) {
17068                 uint32_t refmod;
17069
17070                 m = vm_page_lookup(object,
17071                                    start - entry->vme_start + VME_OFFSET(entry));
17072                 if (m==VM_PAGE_NULL) {
17073                         /* shoud we try to fault a page here? we can probably
17074                          * demand it exists and is locked for this request */
17075                         vm_object_unlock(object);
17076                         return KERN_FAILURE;
17077                 }
17078                 /* deal with special page status */
17079                 if (m->busy ||
17080                     (m->unusual && (m->error || m->restart || m->private || m->absent))) {
17081                         vm_object_unlock(object);
17082                         return KERN_FAILURE;
17083                 }
17084
17085                 /* Page is OK... now "validate" it */
17086                 /* This is the place where we'll call out to create a code
17087                  * directory, later */
17088                 m->cs_validated = TRUE;
17089
17090                 /* The page is now "clean" for codesigning purposes. That means
17091                  * we don't consider it as modified (wpmapped) anymore. But
17092                  * we'll disconnect the page so we note any future modification
17093                  * attempts. */
17094                 m->wpmapped = FALSE;
17095                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
17096
17097                 /* Pull the dirty status from the pmap, since we cleared the
17098                  * wpmapped bit */
17099                 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) {
17100                         SET_PAGE_DIRTY(m, FALSE);
17101                 }
17102
17103                 /* On to the next page */
17104                 start += PAGE_SIZE;
17105         }
17106         vm_object_unlock(object);
17107
17108         return KERN_SUCCESS;
17109 }
17110 #endif
17111
17112 kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
17113 {
17114         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
17115         vm_map_entry_t next_entry;
17116         kern_return_t   kr = KERN_SUCCESS;
17117         vm_map_t        zap_map;
17118
17119         vm_map_lock(map);
17120
17121         /*
17122          * We use a "zap_map" to avoid having to unlock
17123          * the "map" in vm_map_delete().
17124          */
17125         zap_map = vm_map_create(PMAP_NULL,
17126                                 map->min_offset,
17127                                 map->max_offset,
17128                                 map->hdr.entries_pageable);
17129
17130         if (zap_map == VM_MAP_NULL) {
17131                 return KERN_RESOURCE_SHORTAGE;
17132         }
17133
17134         vm_map_set_page_shift(zap_map,
17135                               VM_MAP_PAGE_SHIFT(map));
17136         vm_map_disable_hole_optimization(zap_map);
17137
17138         for (entry = vm_map_first_entry(map);
17139              entry != vm_map_to_entry(map);
17140              entry = next_entry) {
17141                 next_entry = entry->vme_next;
17142
17143                 if (VME_OBJECT(entry) &&
17144                     !entry->is_sub_map &&
17145                     (VME_OBJECT(entry)->internal == TRUE) &&
17146                     (VME_OBJECT(entry)->ref_count == 1)) {
17147
17148                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
17149                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
17150
17151                         (void)vm_map_delete(map,
17152                                             entry->vme_start,
17153                                             entry->vme_end,
17154                                             VM_MAP_REMOVE_SAVE_ENTRIES,
17155                                             zap_map);
17156                 }
17157         }
17158
17159         vm_map_unlock(map);
17160
17161         /*
17162          * Get rid of the "zap_maps" and all the map entries that
17163          * they may still contain.
17164          */
17165         if (zap_map != VM_MAP_NULL) {
17166                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
17167                 zap_map = VM_MAP_NULL;
17168         }
17169
17170         return kr;
17171 }
17172
17173
17174 #if DEVELOPMENT || DEBUG
17175
17176 int
17177 vm_map_disconnect_page_mappings(
17178         vm_map_t map,
17179         boolean_t do_unnest)
17180 {
17181         vm_map_entry_t entry;
17182         int     page_count = 0;
17183
17184         if (do_unnest == TRUE) {
17185 #ifndef NO_NESTED_PMAP
17186                 vm_map_lock(map);
17187
17188                 for (entry = vm_map_first_entry(map);
17189                      entry != vm_map_to_entry(map);
17190                      entry = entry->vme_next) {
17191
17192                         if (entry->is_sub_map && entry->use_pmap) {
17193                                 /*
17194                                  * Make sure the range between the start of this entry and
17195                                  * the end of this entry is no longer nested, so that
17196                                  * we will only remove mappings from the pmap in use by this
17197                                  * this task
17198                                  */
17199                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
17200                         }
17201                 }
17202                 vm_map_unlock(map);
17203 #endif
17204         }
17205         vm_map_lock_read(map);
17206
17207         page_count = map->pmap->stats.resident_count;
17208
17209         for (entry = vm_map_first_entry(map);
17210              entry != vm_map_to_entry(map);
17211              entry = entry->vme_next) {
17212
17213                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
17214                                            (VME_OBJECT(entry)->phys_contiguous))) {
17215                         continue;
17216                 }
17217                 if (entry->is_sub_map)
17218                         assert(!entry->use_pmap);
17219
17220                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
17221         }
17222         vm_map_unlock_read(map);
17223
17224         return page_count;
17225 }
17226
17227 #endif
17228
17229
17230 #if CONFIG_FREEZE
17231
17232
17233 int c_freezer_swapout_count;
17234 int c_freezer_compression_count = 0;
17235 AbsoluteTime c_freezer_last_yield_ts = 0;
17236
17237 kern_return_t vm_map_freeze(
17238                 vm_map_t map,
17239                 unsigned int *purgeable_count,
17240                 unsigned int *wired_count,
17241                 unsigned int *clean_count,
17242                 unsigned int *dirty_count,
17243                 __unused unsigned int dirty_budget,
17244                 boolean_t *has_shared)
17245 {
17246         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
17247         kern_return_t   kr = KERN_SUCCESS;
17248
17249         *purgeable_count = *wired_count = *clean_count = *dirty_count = 0;
17250         *has_shared = FALSE;
17251
17252         /*
17253          * We need the exclusive lock here so that we can
17254          * block any page faults or lookups while we are
17255          * in the middle of freezing this vm map.
17256          */
17257         vm_map_lock(map);
17258
17259         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
17260
17261         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17262                 kr = KERN_NO_SPACE;
17263                 goto done;
17264         }
17265
17266         c_freezer_compression_count = 0;
17267         clock_get_uptime(&c_freezer_last_yield_ts);
17268
17269         for (entry2 = vm_map_first_entry(map);
17270              entry2 != vm_map_to_entry(map);
17271              entry2 = entry2->vme_next) {
17272
17273                 vm_object_t     src_object = VME_OBJECT(entry2);
17274
17275                 if (src_object &&
17276                     !entry2->is_sub_map &&
17277                     !src_object->phys_contiguous) {
17278                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
17279
17280                         if (src_object->internal == TRUE) {
17281
17282                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17283                                         /*
17284                                          * Pages belonging to this object could be swapped to disk.
17285                                          * Make sure it's not a shared object because we could end
17286                                          * up just bringing it back in again.
17287                                          */
17288                                         if (src_object->ref_count > 1) {
17289                                                 continue;
17290                                         }
17291                                 }
17292                                 vm_object_compressed_freezer_pageout(src_object);
17293
17294                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
17295                                         kr = KERN_NO_SPACE;
17296                                         break;
17297                                 }
17298                         }
17299                 }
17300         }
17301 done:
17302         vm_map_unlock(map);
17303
17304         vm_object_compressed_freezer_done();
17305
17306         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
17307                 /*
17308                  * reset the counter tracking the # of swapped c_segs
17309                  * because we are now done with this freeze session and task.
17310                  */
17311                 c_freezer_swapout_count = 0;
17312         }
17313         return kr;
17314 }
17315
17316 #endif
17317
17318 /*
17319  * vm_map_entry_should_cow_for_true_share:
17320  *
17321  * Determines if the map entry should be clipped and setup for copy-on-write
17322  * to avoid applying "true_share" to a large VM object when only a subset is
17323  * targeted.
17324  *
17325  * For now, we target only the map entries created for the Objective C
17326  * Garbage Collector, which initially have the following properties:
17327  *      - alias == VM_MEMORY_MALLOC
17328  *      - wired_count == 0
17329  *      - !needs_copy
17330  * and a VM object with:
17331  *      - internal
17332  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
17333  *      - !true_share
17334  *      - vo_size == ANON_CHUNK_SIZE
17335  *
17336  * Only non-kernel map entries.
17337  */
17338 boolean_t
17339 vm_map_entry_should_cow_for_true_share(
17340         vm_map_entry_t  entry)
17341 {
17342         vm_object_t     object;
17343
17344         if (entry->is_sub_map) {
17345                 /* entry does not point at a VM object */
17346                 return FALSE;
17347         }
17348
17349         if (entry->needs_copy) {
17350                 /* already set for copy_on_write: done! */
17351                 return FALSE;
17352         }
17353
17354         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
17355             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
17356                 /* not a malloc heap or Obj-C Garbage Collector heap */
17357                 return FALSE;
17358         }
17359
17360         if (entry->wired_count) {
17361                 /* wired: can't change the map entry... */
17362                 vm_counters.should_cow_but_wired++;
17363                 return FALSE;
17364         }
17365
17366         object = VME_OBJECT(entry);
17367
17368         if (object == VM_OBJECT_NULL) {
17369                 /* no object yet... */
17370                 return FALSE;
17371         }
17372
17373         if (!object->internal) {
17374                 /* not an internal object */
17375                 return FALSE;
17376         }
17377
17378         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
17379                 /* not the default copy strategy */
17380                 return FALSE;
17381         }
17382
17383         if (object->true_share) {
17384                 /* already true_share: too late to avoid it */
17385                 return FALSE;
17386         }
17387
17388         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
17389             object->vo_size != ANON_CHUNK_SIZE) {
17390                 /* ... not an object created for the ObjC Garbage Collector */
17391                 return FALSE;
17392         }
17393
17394         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
17395             object->vo_size != 2048 * 4096) {
17396                 /* ... not a "MALLOC_SMALL" heap */
17397                 return FALSE;
17398         }
17399
17400         /*
17401          * All the criteria match: we have a large object being targeted for "true_share".
17402          * To limit the adverse side-effects linked with "true_share", tell the caller to
17403          * try and avoid setting up the entire object for "true_share" by clipping the
17404          * targeted range and setting it up for copy-on-write.
17405          */
17406         return TRUE;
17407 }
17408
17409 vm_map_offset_t
17410 vm_map_round_page_mask(
17411         vm_map_offset_t offset,
17412         vm_map_offset_t mask)
17413 {
17414         return VM_MAP_ROUND_PAGE(offset, mask);
17415 }
17416
17417 vm_map_offset_t
17418 vm_map_trunc_page_mask(
17419         vm_map_offset_t offset,
17420         vm_map_offset_t mask)
17421 {
17422         return VM_MAP_TRUNC_PAGE(offset, mask);
17423 }
17424
17425 boolean_t
17426 vm_map_page_aligned(
17427         vm_map_offset_t offset,
17428         vm_map_offset_t mask)
17429 {
17430         return ((offset) & mask) == 0;
17431 }
17432
17433 int
17434 vm_map_page_shift(
17435         vm_map_t map)
17436 {
17437         return VM_MAP_PAGE_SHIFT(map);
17438 }
17439
17440 int
17441 vm_map_page_size(
17442         vm_map_t map)
17443 {
17444         return VM_MAP_PAGE_SIZE(map);
17445 }
17446
17447 vm_map_offset_t
17448 vm_map_page_mask(
17449         vm_map_t map)
17450 {
17451         return VM_MAP_PAGE_MASK(map);
17452 }
17453
17454 kern_return_t
17455 vm_map_set_page_shift(
17456         vm_map_t        map,
17457         int             pageshift)
17458 {
17459         if (map->hdr.nentries != 0) {
17460                 /* too late to change page size */
17461                 return KERN_FAILURE;
17462         }
17463
17464         map->hdr.page_shift = pageshift;
17465
17466         return KERN_SUCCESS;
17467 }
17468
17469 kern_return_t
17470 vm_map_query_volatile(
17471         vm_map_t        map,
17472         mach_vm_size_t  *volatile_virtual_size_p,
17473         mach_vm_size_t  *volatile_resident_size_p,
17474         mach_vm_size_t  *volatile_compressed_size_p,
17475         mach_vm_size_t  *volatile_pmap_size_p,
17476         mach_vm_size_t  *volatile_compressed_pmap_size_p)
17477 {
17478         mach_vm_size_t  volatile_virtual_size;
17479         mach_vm_size_t  volatile_resident_count;
17480         mach_vm_size_t  volatile_compressed_count;
17481         mach_vm_size_t  volatile_pmap_count;
17482         mach_vm_size_t  volatile_compressed_pmap_count;
17483         mach_vm_size_t  resident_count;
17484         vm_map_entry_t  entry;
17485         vm_object_t     object;
17486
17487         /* map should be locked by caller */
17488
17489         volatile_virtual_size = 0;
17490         volatile_resident_count = 0;
17491         volatile_compressed_count = 0;
17492         volatile_pmap_count = 0;
17493         volatile_compressed_pmap_count = 0;
17494
17495         for (entry = vm_map_first_entry(map);
17496              entry != vm_map_to_entry(map);
17497              entry = entry->vme_next) {
17498                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
17499
17500                 if (entry->is_sub_map) {
17501                         continue;
17502                 }
17503                 if (! (entry->protection & VM_PROT_WRITE)) {
17504                         continue;
17505                 }
17506                 object = VME_OBJECT(entry);
17507                 if (object == VM_OBJECT_NULL) {
17508                         continue;
17509                 }
17510                 if (object->purgable != VM_PURGABLE_VOLATILE &&
17511                     object->purgable != VM_PURGABLE_EMPTY) {
17512                         continue;
17513                 }
17514                 if (VME_OFFSET(entry)) {
17515                         /*
17516                          * If the map entry has been split and the object now
17517                          * appears several times in the VM map, we don't want
17518                          * to count the object's resident_page_count more than
17519                          * once.  We count it only for the first one, starting
17520                          * at offset 0 and ignore the other VM map entries.
17521                          */
17522                         continue;
17523                 }
17524                 resident_count = object->resident_page_count;
17525                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
17526                         resident_count = 0;
17527                 } else {
17528                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
17529                 }
17530
17531                 volatile_virtual_size += entry->vme_end - entry->vme_start;
17532                 volatile_resident_count += resident_count;
17533                 if (object->pager) {
17534                         volatile_compressed_count +=
17535                                 vm_compressor_pager_get_count(object->pager);
17536                 }
17537                 pmap_compressed_bytes = 0;
17538                 pmap_resident_bytes =
17539                         pmap_query_resident(map->pmap,
17540                                             entry->vme_start,
17541                                             entry->vme_end,
17542                                             &pmap_compressed_bytes);
17543                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
17544                 volatile_compressed_pmap_count += (pmap_compressed_bytes
17545                                                    / PAGE_SIZE);
17546         }
17547
17548         /* map is still locked on return */
17549
17550         *volatile_virtual_size_p = volatile_virtual_size;
17551         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
17552         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
17553         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
17554         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
17555
17556         return KERN_SUCCESS;
17557 }
17558
17559 void
17560 vm_map_sizes(vm_map_t map,
17561                 vm_map_size_t * psize,
17562                 vm_map_size_t * pfree,
17563                 vm_map_size_t * plargest_free)
17564 {
17565     vm_map_entry_t  entry;
17566     vm_map_offset_t prev;
17567     vm_map_size_t   free, total_free, largest_free;
17568     boolean_t       end;
17569
17570     if (!map)
17571     {
17572         *psize = *pfree = *plargest_free = 0;
17573         return;
17574     }
17575     total_free = largest_free = 0;
17576
17577     vm_map_lock_read(map);
17578     if (psize) *psize = map->max_offset - map->min_offset;
17579
17580     prev = map->min_offset;
17581     for (entry = vm_map_first_entry(map);; entry = entry->vme_next)
17582     {
17583         end = (entry == vm_map_to_entry(map));
17584
17585         if (end) free = entry->vme_end   - prev;
17586         else     free = entry->vme_start - prev;
17587
17588         total_free += free;
17589         if (free > largest_free) largest_free = free;
17590
17591         if (end) break;
17592         prev = entry->vme_end;
17593     }
17594     vm_map_unlock_read(map);
17595     if (pfree)         *pfree = total_free;
17596     if (plargest_free) *plargest_free = largest_free;
17597 }
17598
17599 #if VM_SCAN_FOR_SHADOW_CHAIN
17600 int vm_map_shadow_max(vm_map_t map);
17601 int vm_map_shadow_max(
17602         vm_map_t map)
17603 {
17604         int             shadows, shadows_max;
17605         vm_map_entry_t  entry;
17606         vm_object_t     object, next_object;
17607
17608         if (map == NULL)
17609                 return 0;
17610
17611         shadows_max = 0;
17612
17613         vm_map_lock_read(map);
17614
17615         for (entry = vm_map_first_entry(map);
17616              entry != vm_map_to_entry(map);
17617              entry = entry->vme_next) {
17618                 if (entry->is_sub_map) {
17619                         continue;
17620                 }
17621                 object = VME_OBJECT(entry);
17622                 if (object == NULL) {
17623                         continue;
17624                 }
17625                 vm_object_lock_shared(object);
17626                 for (shadows = 0;
17627                      object->shadow != NULL;
17628                      shadows++, object = next_object) {
17629                         next_object = object->shadow;
17630                         vm_object_lock_shared(next_object);
17631                         vm_object_unlock(object);
17632                 }
17633                 vm_object_unlock(object);
17634                 if (shadows > shadows_max) {
17635                         shadows_max = shadows;
17636                 }
17637         }
17638
17639         vm_map_unlock_read(map);
17640
17641         return shadows_max;
17642 }
17643 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
17644
17645 void vm_commit_pagezero_status(vm_map_t lmap) {
17646         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
17647 }
17648
17649 #if __x86_64__
17650 void
17651 vm_map_set_high_start(
17652         vm_map_t        map,
17653         vm_map_offset_t high_start)
17654 {
17655         map->vmmap_high_start = high_start;
17656 }
17657 #endif /* __x86_64__ */